xref: /illumos-gate/usr/src/uts/common/os/streamio.c (revision bbf21555)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 
25 /*
26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2017 Joyent, Inc.
28  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/param.h>
34 #include <sys/errno.h>
35 #include <sys/signal.h>
36 #include <sys/stat.h>
37 #include <sys/proc.h>
38 #include <sys/cred.h>
39 #include <sys/user.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/stream.h>
43 #include <sys/strsubr.h>
44 #include <sys/stropts.h>
45 #include <sys/tihdr.h>
46 #include <sys/var.h>
47 #include <sys/poll.h>
48 #include <sys/termio.h>
49 #include <sys/ttold.h>
50 #include <sys/systm.h>
51 #include <sys/uio.h>
52 #include <sys/cmn_err.h>
53 #include <sys/sad.h>
54 #include <sys/netstack.h>
55 #include <sys/priocntl.h>
56 #include <sys/jioctl.h>
57 #include <sys/procset.h>
58 #include <sys/session.h>
59 #include <sys/kmem.h>
60 #include <sys/filio.h>
61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
63 #include <sys/strredir.h>
64 #include <sys/fs/fifonode.h>
65 #include <sys/fs/snode.h>
66 #include <sys/strlog.h>
67 #include <sys/strsun.h>
68 #include <sys/project.h>
69 #include <sys/kbio.h>
70 #include <sys/msio.h>
71 #include <sys/tty.h>
72 #include <sys/ptyvar.h>
73 #include <sys/vuid_event.h>
74 #include <sys/modctl.h>
75 #include <sys/sunddi.h>
76 #include <sys/sunldi_impl.h>
77 #include <sys/autoconf.h>
78 #include <sys/policy.h>
79 #include <sys/dld.h>
80 #include <sys/zone.h>
81 #include <sys/ptms.h>
82 #include <sys/limits.h>
83 #include <c2/audit.h>
84 
85 /*
86  * This define helps improve the readability of streams code while
87  * still maintaining a very old streams performance enhancement.  The
88  * performance enhancement basically involved having all callers
89  * of straccess() perform the first check that straccess() will do
90  * locally before actually calling straccess().  (There by reducing
91  * the number of unnecessary calls to straccess().)
92  */
93 #define	i_straccess(x, y)	((stp->sd_sidp == NULL) ? 0 : \
94 				    (stp->sd_vnode->v_type == VFIFO) ? 0 : \
95 				    straccess((x), (y)))
96 
97 /*
98  * what is mblk_pull_len?
99  *
100  * If a streams message consists of many short messages,
101  * a performance degradation occurs from copyout overhead.
102  * To decrease the per mblk overhead, messages that are
103  * likely to consist of many small mblks are pulled up into
104  * one continuous chunk of memory.
105  *
106  * To avoid the processing overhead of examining every
107  * mblk, a quick heuristic is used. If the first mblk in
108  * the message is shorter than mblk_pull_len, it is likely
109  * that the rest of the mblk will be short.
110  *
111  * This heuristic was decided upon after performance tests
112  * indicated that anything more complex slowed down the main
113  * code path.
114  */
115 #define	MBLK_PULL_LEN 64
116 uint32_t mblk_pull_len = MBLK_PULL_LEN;
117 
118 /*
119  * The sgttyb_handling flag controls the handling of the old BSD
120  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
121  *
122  * 0 - Emit no warnings at all and retain old, broken behavior.
123  * 1 - Emit no warnings and silently handle new semantics.
124  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
125  *     (once per system invocation).  Handle with new semantics.
126  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
127  *     made (so that offenders drop core and are easy to debug).
128  *
129  * The "new semantics" are that TIOCGETP returns B38400 for
130  * sg_[io]speed if the corresponding value is over B38400, and that
131  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
132  * bit rate."
133  */
134 int sgttyb_handling = 1;
135 static boolean_t sgttyb_complaint;
136 
137 /* don't push drcompat module by default on Style-2 streams */
138 static int push_drcompat = 0;
139 
140 /*
141  * id value used to distinguish between different ioctl messages
142  */
143 static uint32_t ioc_id;
144 
145 static void putback(struct stdata *, queue_t *, mblk_t *, int);
146 static void strcleanall(struct vnode *);
147 static int strwsrv(queue_t *);
148 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
149 
150 /*
151  * qinit and module_info structures for stream head read and write queues
152  */
153 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
154 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
155 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
156 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
157 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
158     FIFOLOWAT };
159 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
160 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
161 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
162 
163 extern kmutex_t	strresources;	/* protects global resources */
164 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
165 
166 static boolean_t msghasdata(mblk_t *bp);
167 #define	msgnodata(bp) (!msghasdata(bp))
168 
169 /*
170  * Stream head locking notes:
171  *	There are four monitors associated with the stream head:
172  *	1. v_stream monitor: in stropen() and strclose() v_lock
173  *		is held while the association of vnode and stream
174  *		head is established or tested for.
175  *	2. open/close/push/pop monitor: sd_lock is held while each
176  *		thread bids for exclusive access to this monitor
177  *		for opening or closing a stream.  In addition, this
178  *		monitor is entered during pushes and pops.  This
179  *		guarantees that during plumbing operations there
180  *		is only one thread trying to change the plumbing.
181  *		Any other threads present in the stream are only
182  *		using the plumbing.
183  *	3. read/write monitor: in the case of read, a thread holds
184  *		sd_lock while trying to get data from the stream
185  *		head queue.  if there is none to fulfill a read
186  *		request, it sets RSLEEP and calls cv_wait_sig() down
187  *		in strwaitq() to await the arrival of new data.
188  *		when new data arrives in strrput(), sd_lock is acquired
189  *		before testing for RSLEEP and calling cv_broadcast().
190  *		the behavior of strwrite(), strwsrv(), and WSLEEP
191  *		mirror this.
192  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
193  *		thread is doing an ioctl at a time.
194  */
195 
196 static int
push_mod(queue_t * qp,dev_t * devp,struct stdata * stp,const char * name,int anchor,cred_t * crp,uint_t anchor_zoneid)197 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
198     int anchor, cred_t *crp, uint_t anchor_zoneid)
199 {
200 	int error;
201 	fmodsw_impl_t *fp;
202 
203 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
204 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
205 		return (error);
206 	}
207 	if (stp->sd_pushcnt >= nstrpush) {
208 		return (EINVAL);
209 	}
210 
211 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
212 		stp->sd_flag |= STREOPENFAIL;
213 		return (EINVAL);
214 	}
215 
216 	/*
217 	 * push new module and call its open routine via qattach
218 	 */
219 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
220 		return (error);
221 
222 	/*
223 	 * Check to see if caller wants a STREAMS anchor
224 	 * put at this place in the stream, and add if so.
225 	 */
226 	mutex_enter(&stp->sd_lock);
227 	if (anchor == stp->sd_pushcnt) {
228 		stp->sd_anchor = stp->sd_pushcnt;
229 		stp->sd_anchorzone = anchor_zoneid;
230 	}
231 	mutex_exit(&stp->sd_lock);
232 
233 	return (0);
234 }
235 
236 static int
xpg4_fixup(queue_t * qp,dev_t * devp,struct stdata * stp,cred_t * crp)237 xpg4_fixup(queue_t *qp, dev_t *devp, struct stdata *stp, cred_t *crp)
238 {
239 	static const char *ptsmods[] = {
240 	    "ptem", "ldterm", "ttcompat"
241 	};
242 	dev_t dummydev = *devp;
243 	struct strioctl strioc;
244 	zoneid_t zoneid;
245 	int32_t rval;
246 	uint_t i;
247 
248 	/*
249 	 * Push modules required for the slave PTY to have terminal
250 	 * semantics out of the box; this is required by XPG4v2.
251 	 * These three modules are flagged as single-instance so that
252 	 * the system will never end up with duplicate copies pushed
253 	 * onto a stream.
254 	 */
255 
256 	zoneid = crgetzoneid(crp);
257 	for (i = 0; i < ARRAY_SIZE(ptsmods); i++) {
258 		int error;
259 
260 		error = push_mod(qp, &dummydev, stp, ptsmods[i], 0,
261 		    crp, zoneid);
262 		if (error != 0)
263 			return (error);
264 	}
265 
266 	/*
267 	 * Send PTSSTTY down the stream
268 	 */
269 
270 	strioc.ic_cmd = PTSSTTY;
271 	strioc.ic_timout = 0;
272 	strioc.ic_len = 0;
273 	strioc.ic_dp = NULL;
274 
275 	(void) strdoioctl(stp, &strioc, FNATIVE, K_TO_K, crp, &rval);
276 
277 	return (0);
278 }
279 
280 /*
281  * Open a stream device.
282  */
283 int
stropen(vnode_t * vp,dev_t * devp,int flag,cred_t * crp)284 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
285 {
286 	struct stdata *stp;
287 	queue_t *qp;
288 	int s;
289 	dev_t dummydev, savedev;
290 	struct autopush *ap;
291 	struct dlautopush dlap;
292 	int error = 0;
293 	ssize_t	rmin, rmax;
294 	int cloneopen;
295 	queue_t *brq;
296 	major_t major;
297 	str_stack_t *ss;
298 	zoneid_t zoneid;
299 	uint_t anchor;
300 
301 	/*
302 	 * If the stream already exists, wait for any open in progress
303 	 * to complete, then call the open function of each module and
304 	 * driver in the stream.  Otherwise create the stream.
305 	 */
306 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
307 retry:
308 	mutex_enter(&vp->v_lock);
309 	if ((stp = vp->v_stream) != NULL) {
310 
311 		/*
312 		 * Waiting for stream to be created to device
313 		 * due to another open.
314 		 */
315 		mutex_exit(&vp->v_lock);
316 
317 		if (STRMATED(stp)) {
318 			struct stdata *strmatep = stp->sd_mate;
319 
320 			STRLOCKMATES(stp);
321 			if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
322 				if (flag & (FNDELAY|FNONBLOCK)) {
323 					error = EAGAIN;
324 					mutex_exit(&strmatep->sd_lock);
325 					goto ckreturn;
326 				}
327 				mutex_exit(&stp->sd_lock);
328 				if (!cv_wait_sig(&strmatep->sd_monitor,
329 				    &strmatep->sd_lock)) {
330 					error = EINTR;
331 					mutex_exit(&strmatep->sd_lock);
332 					mutex_enter(&stp->sd_lock);
333 					goto ckreturn;
334 				}
335 				mutex_exit(&strmatep->sd_lock);
336 				goto retry;
337 			}
338 			if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
339 				if (flag & (FNDELAY|FNONBLOCK)) {
340 					error = EAGAIN;
341 					mutex_exit(&strmatep->sd_lock);
342 					goto ckreturn;
343 				}
344 				mutex_exit(&strmatep->sd_lock);
345 				if (!cv_wait_sig(&stp->sd_monitor,
346 				    &stp->sd_lock)) {
347 					error = EINTR;
348 					goto ckreturn;
349 				}
350 				mutex_exit(&stp->sd_lock);
351 				goto retry;
352 			}
353 
354 			if (stp->sd_flag & (STRDERR|STWRERR)) {
355 				error = EIO;
356 				mutex_exit(&strmatep->sd_lock);
357 				goto ckreturn;
358 			}
359 
360 			stp->sd_flag |= STWOPEN;
361 			STRUNLOCKMATES(stp);
362 		} else {
363 			mutex_enter(&stp->sd_lock);
364 			if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
365 				if (flag & (FNDELAY|FNONBLOCK)) {
366 					error = EAGAIN;
367 					goto ckreturn;
368 				}
369 				if (!cv_wait_sig(&stp->sd_monitor,
370 				    &stp->sd_lock)) {
371 					error = EINTR;
372 					goto ckreturn;
373 				}
374 				mutex_exit(&stp->sd_lock);
375 				goto retry;  /* could be clone! */
376 			}
377 
378 			if (stp->sd_flag & (STRDERR|STWRERR)) {
379 				error = EIO;
380 				goto ckreturn;
381 			}
382 
383 			stp->sd_flag |= STWOPEN;
384 			mutex_exit(&stp->sd_lock);
385 		}
386 
387 		/*
388 		 * Open all modules and devices down stream to notify
389 		 * that another user is streaming.  For modules, set the
390 		 * last argument to MODOPEN and do not pass any open flags.
391 		 * Ignore dummydev since this is not the first open.
392 		 */
393 		claimstr(stp->sd_wrq);
394 		qp = stp->sd_wrq;
395 		while (_SAMESTR(qp)) {
396 			qp = qp->q_next;
397 			if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
398 				break;
399 		}
400 		releasestr(stp->sd_wrq);
401 		mutex_enter(&stp->sd_lock);
402 		stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
403 		stp->sd_rerror = 0;
404 		stp->sd_werror = 0;
405 ckreturn:
406 		cv_broadcast(&stp->sd_monitor);
407 		mutex_exit(&stp->sd_lock);
408 		return (error);
409 	}
410 
411 	/*
412 	 * This vnode isn't streaming.  SPECFS already
413 	 * checked for multiple vnodes pointing to the
414 	 * same stream, so create a stream to the driver.
415 	 */
416 	qp = allocq();
417 	stp = shalloc(qp);
418 
419 	/*
420 	 * Initialize stream head.  shalloc() has given us
421 	 * exclusive access, and we have the vnode locked;
422 	 * we can do whatever we want with stp.
423 	 */
424 	stp->sd_flag = STWOPEN;
425 	stp->sd_siglist = NULL;
426 	stp->sd_pollist.ph_list = NULL;
427 	stp->sd_sigflags = 0;
428 	stp->sd_mark = NULL;
429 	stp->sd_closetime = STRTIMOUT;
430 	stp->sd_sidp = NULL;
431 	stp->sd_pgidp = NULL;
432 	stp->sd_vnode = vp;
433 	stp->sd_pvnode = NULL;
434 	stp->sd_rerror = 0;
435 	stp->sd_werror = 0;
436 	stp->sd_wroff = 0;
437 	stp->sd_tail = 0;
438 	stp->sd_iocblk = NULL;
439 	stp->sd_cmdblk = NULL;
440 	stp->sd_pushcnt = 0;
441 	stp->sd_qn_minpsz = 0;
442 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
443 	stp->sd_maxblk = INFPSZ;
444 	qp->q_ptr = _WR(qp)->q_ptr = stp;
445 	STREAM(qp) = STREAM(_WR(qp)) = stp;
446 	vp->v_stream = stp;
447 	mutex_exit(&vp->v_lock);
448 	if (vp->v_type == VFIFO) {
449 		stp->sd_flag |= OLDNDELAY;
450 		/*
451 		 * This means, both for pipes and fifos
452 		 * strwrite will send SIGPIPE if the other
453 		 * end is closed. For putmsg it depends
454 		 * on whether it is a XPG4_2 application
455 		 * or not
456 		 */
457 		stp->sd_wput_opt = SW_SIGPIPE;
458 
459 		/* setq might sleep in kmem_alloc - avoid holding locks. */
460 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
461 		    SQ_CI|SQ_CO, B_FALSE);
462 
463 		set_qend(qp);
464 		stp->sd_strtab = fifo_getinfo();
465 		_WR(qp)->q_nfsrv = _WR(qp);
466 		qp->q_nfsrv = qp;
467 		/*
468 		 * Wake up others that are waiting for stream to be created.
469 		 */
470 		mutex_enter(&stp->sd_lock);
471 		/*
472 		 * nothing is be pushed on stream yet, so
473 		 * optimized stream head packetsizes are just that
474 		 * of the read queue
475 		 */
476 		stp->sd_qn_minpsz = qp->q_minpsz;
477 		stp->sd_qn_maxpsz = qp->q_maxpsz;
478 		stp->sd_flag &= ~STWOPEN;
479 		goto fifo_opendone;
480 	}
481 	/* setq might sleep in kmem_alloc - avoid holding locks. */
482 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
483 
484 	set_qend(qp);
485 
486 	/*
487 	 * Open driver and create stream to it (via qattach).
488 	 */
489 	savedev = *devp;
490 	cloneopen = (getmajor(*devp) == clone_major);
491 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
492 		mutex_enter(&vp->v_lock);
493 		vp->v_stream = NULL;
494 		mutex_exit(&vp->v_lock);
495 		mutex_enter(&stp->sd_lock);
496 		cv_broadcast(&stp->sd_monitor);
497 		mutex_exit(&stp->sd_lock);
498 		freeq(_RD(qp));
499 		shfree(stp);
500 		return (error);
501 	}
502 	/*
503 	 * Set sd_strtab after open in order to handle clonable drivers
504 	 */
505 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
506 
507 	/*
508 	 * Historical note: dummydev used to be be prior to the initial
509 	 * open (via qattach above), which made the value seen
510 	 * inconsistent between an I_PUSH and an autopush of a module.
511 	 */
512 	dummydev = *devp;
513 
514 	/*
515 	 * For clone open of old style (Q not associated) network driver,
516 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
517 	 */
518 	brq = _RD(_WR(qp)->q_next);
519 	major = getmajor(*devp);
520 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
521 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
522 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
523 			cmn_err(CE_WARN, "cannot push " DRMODNAME
524 			    " streams module");
525 	}
526 
527 	if (!NETWORK_DRV(major)) {
528 		savedev = *devp;
529 	} else {
530 		/*
531 		 * For network devices, process differently based on the
532 		 * return value from dld_autopush():
533 		 *
534 		 *   0: the passed-in device points to a GLDv3 datalink with
535 		 *   per-link autopush configuration; use that configuration
536 		 *   and ignore any per-driver autopush configuration.
537 		 *
538 		 *   1: the passed-in device points to a physical GLDv3
539 		 *   datalink without per-link autopush configuration.  The
540 		 *   passed in device was changed to refer to the actual
541 		 *   physical device (if it's not already); we use that new
542 		 *   device to look up any per-driver autopush configuration.
543 		 *
544 		 *   -1: neither of the above cases applied; use the initial
545 		 *   device to look up any per-driver autopush configuration.
546 		 */
547 		switch (dld_autopush(&savedev, &dlap)) {
548 		case 0:
549 			zoneid = crgetzoneid(crp);
550 			for (s = 0; s < dlap.dap_npush; s++) {
551 				error = push_mod(qp, &dummydev, stp,
552 				    dlap.dap_aplist[s], dlap.dap_anchor, crp,
553 				    zoneid);
554 				if (error != 0)
555 					break;
556 			}
557 			goto opendone;
558 		case 1:
559 			break;
560 		case -1:
561 			savedev = *devp;
562 			break;
563 		}
564 	}
565 	/*
566 	 * Find the autopush configuration based on "savedev". Start with the
567 	 * global zone. If not found check in the local zone.
568 	 */
569 	zoneid = GLOBAL_ZONEID;
570 retryap:
571 	ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
572 	    netstack_str;
573 	if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
574 		netstack_rele(ss->ss_netstack);
575 		if (zoneid == GLOBAL_ZONEID) {
576 			/*
577 			 * None found. Also look in the zone's autopush table.
578 			 */
579 			zoneid = crgetzoneid(crp);
580 			if (zoneid != GLOBAL_ZONEID)
581 				goto retryap;
582 		}
583 		goto opendone;
584 	}
585 	anchor = ap->ap_anchor;
586 	zoneid = crgetzoneid(crp);
587 	for (s = 0; s < ap->ap_npush; s++) {
588 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
589 		    anchor, crp, zoneid);
590 		if (error != 0)
591 			break;
592 	}
593 	sad_ap_rele(ap, ss);
594 	netstack_rele(ss->ss_netstack);
595 
596 opendone:
597 
598 	if (error == 0 &&
599 	    (stp->sd_flag & (STRISTTY|STRXPG4TTY)) == (STRISTTY|STRXPG4TTY)) {
600 		error = xpg4_fixup(qp, devp, stp, crp);
601 	}
602 
603 	/*
604 	 * let specfs know that open failed part way through
605 	 */
606 	if (error != 0) {
607 		mutex_enter(&stp->sd_lock);
608 		stp->sd_flag |= STREOPENFAIL;
609 		mutex_exit(&stp->sd_lock);
610 	}
611 
612 	/*
613 	 * Wake up others that are waiting for stream to be created.
614 	 */
615 	mutex_enter(&stp->sd_lock);
616 	stp->sd_flag &= ~STWOPEN;
617 
618 	/*
619 	 * As a performance concern we are caching the values of
620 	 * q_minpsz and q_maxpsz of the module below the stream
621 	 * head in the stream head.
622 	 */
623 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
624 	rmin = stp->sd_wrq->q_next->q_minpsz;
625 	rmax = stp->sd_wrq->q_next->q_maxpsz;
626 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
627 
628 	/* do this processing here as a performance concern */
629 	if (strmsgsz != 0) {
630 		if (rmax == INFPSZ)
631 			rmax = strmsgsz;
632 		else
633 			rmax = MIN(strmsgsz, rmax);
634 	}
635 
636 	mutex_enter(QLOCK(stp->sd_wrq));
637 	stp->sd_qn_minpsz = rmin;
638 	stp->sd_qn_maxpsz = rmax;
639 	mutex_exit(QLOCK(stp->sd_wrq));
640 
641 fifo_opendone:
642 	cv_broadcast(&stp->sd_monitor);
643 	mutex_exit(&stp->sd_lock);
644 	return (error);
645 }
646 
647 static int strsink(queue_t *, mblk_t *);
648 static struct qinit deadrend = {
649 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
650 };
651 static struct qinit deadwend = {
652 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
653 };
654 
655 /*
656  * Close a stream.
657  * This is called from closef() on the last close of an open stream.
658  * Strclean() will already have removed the siglist and pollist
659  * information, so all that remains is to remove all multiplexor links
660  * for the stream, pop all the modules (and the driver), and free the
661  * stream structure.
662  */
663 
664 int
strclose(struct vnode * vp,int flag,cred_t * crp)665 strclose(struct vnode *vp, int flag, cred_t *crp)
666 {
667 	struct stdata *stp;
668 	queue_t *qp;
669 	int rval;
670 	int freestp = 1;
671 	queue_t *rmq;
672 
673 	TRACE_1(TR_FAC_STREAMS_FR,
674 	    TR_STRCLOSE, "strclose:%p", vp);
675 	ASSERT(vp->v_stream);
676 
677 	stp = vp->v_stream;
678 	ASSERT(!(stp->sd_flag & STPLEX));
679 	qp = stp->sd_wrq;
680 
681 	/*
682 	 * Needed so that strpoll will return non-zero for this fd.
683 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
684 	 */
685 	mutex_enter(&stp->sd_lock);
686 	stp->sd_flag |= STRHUP;
687 	mutex_exit(&stp->sd_lock);
688 
689 	/*
690 	 * If the registered process or process group did not have an
691 	 * open instance of this stream then strclean would not be
692 	 * called. Thus at the time of closing all remaining siglist entries
693 	 * are removed.
694 	 */
695 	if (stp->sd_siglist != NULL)
696 		strcleanall(vp);
697 
698 	ASSERT(stp->sd_siglist == NULL);
699 	ASSERT(stp->sd_sigflags == 0);
700 
701 	if (STRMATED(stp)) {
702 		struct stdata *strmatep = stp->sd_mate;
703 		int waited = 1;
704 
705 		STRLOCKMATES(stp);
706 		while (waited) {
707 			waited = 0;
708 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
709 				mutex_exit(&strmatep->sd_lock);
710 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
711 				mutex_exit(&stp->sd_lock);
712 				STRLOCKMATES(stp);
713 				waited = 1;
714 			}
715 			while (strmatep->sd_flag &
716 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
717 				mutex_exit(&stp->sd_lock);
718 				cv_wait(&strmatep->sd_monitor,
719 				    &strmatep->sd_lock);
720 				mutex_exit(&strmatep->sd_lock);
721 				STRLOCKMATES(stp);
722 				waited = 1;
723 			}
724 		}
725 		stp->sd_flag |= STRCLOSE;
726 		STRUNLOCKMATES(stp);
727 	} else {
728 		mutex_enter(&stp->sd_lock);
729 		stp->sd_flag |= STRCLOSE;
730 		mutex_exit(&stp->sd_lock);
731 	}
732 
733 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
734 
735 	/* Check if an I_LINK was ever done on this stream */
736 	if (stp->sd_flag & STRHASLINKS) {
737 		netstack_t *ns;
738 		str_stack_t *ss;
739 
740 		ns = netstack_find_by_cred(crp);
741 		ASSERT(ns != NULL);
742 		ss = ns->netstack_str;
743 		ASSERT(ss != NULL);
744 
745 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
746 		netstack_rele(ss->ss_netstack);
747 	}
748 
749 	while (_SAMESTR(qp)) {
750 		/*
751 		 * Holding sd_lock prevents q_next from changing in
752 		 * this stream.
753 		 */
754 		mutex_enter(&stp->sd_lock);
755 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
756 
757 			/*
758 			 * sleep until awakened by strwsrv() or timeout
759 			 */
760 			for (;;) {
761 				mutex_enter(QLOCK(qp->q_next));
762 				if (!(qp->q_next->q_mblkcnt)) {
763 					mutex_exit(QLOCK(qp->q_next));
764 					break;
765 				}
766 				stp->sd_flag |= WSLEEP;
767 
768 				/* ensure strwsrv gets enabled */
769 				qp->q_next->q_flag |= QWANTW;
770 				mutex_exit(QLOCK(qp->q_next));
771 				/* get out if we timed out or recv'd a signal */
772 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
773 				    stp->sd_closetime, 0) <= 0) {
774 					break;
775 				}
776 			}
777 			stp->sd_flag &= ~WSLEEP;
778 		}
779 		mutex_exit(&stp->sd_lock);
780 
781 		rmq = qp->q_next;
782 		if (rmq->q_flag & QISDRV) {
783 			ASSERT(!_SAMESTR(rmq));
784 			wait_sq_svc(_RD(qp)->q_syncq);
785 		}
786 
787 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
788 	}
789 
790 	/*
791 	 * Since we call pollwakeup in close() now, the poll list should
792 	 * be empty in most cases. The only exception is the layered devices
793 	 * (e.g. the console drivers with redirection modules pushed on top
794 	 * of it).  We have to do this after calling qdetach() because
795 	 * the redirection module won't have torn down the console
796 	 * redirection until after qdetach() has been invoked.
797 	 */
798 	if (stp->sd_pollist.ph_list != NULL) {
799 		pollwakeup(&stp->sd_pollist, POLLERR);
800 		pollhead_clean(&stp->sd_pollist);
801 	}
802 	ASSERT(stp->sd_pollist.ph_list == NULL);
803 	ASSERT(stp->sd_sidp == NULL);
804 	ASSERT(stp->sd_pgidp == NULL);
805 
806 	/* Prevent qenable from re-enabling the stream head queue */
807 	disable_svc(_RD(qp));
808 
809 	/*
810 	 * Wait until service procedure of each queue is
811 	 * run, if QINSERVICE is set.
812 	 */
813 	wait_svc(_RD(qp));
814 
815 	/*
816 	 * Now, flush both queues.
817 	 */
818 	flushq(_RD(qp), FLUSHALL);
819 	flushq(qp, FLUSHALL);
820 
821 	/*
822 	 * If the write queue of the stream head is pointing to a
823 	 * read queue, we have a twisted stream.  If the read queue
824 	 * is alive, convert the stream head queues into a dead end.
825 	 * If the read queue is dead, free the dead pair.
826 	 */
827 	if (qp->q_next && !_SAMESTR(qp)) {
828 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
829 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
830 			shfree(qp->q_next->q_stream);
831 			freeq(qp->q_next);
832 			freeq(_RD(qp));
833 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
834 			freeq(_RD(qp));
835 		} else {				/* pipe */
836 			freestp = 0;
837 			/*
838 			 * The q_info pointers are never accessed when
839 			 * SQLOCK is held.
840 			 */
841 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
842 			mutex_enter(SQLOCK(qp->q_syncq));
843 			qp->q_qinfo = &deadwend;
844 			_RD(qp)->q_qinfo = &deadrend;
845 			mutex_exit(SQLOCK(qp->q_syncq));
846 		}
847 	} else {
848 		freeq(_RD(qp)); /* free stream head queue pair */
849 	}
850 
851 	mutex_enter(&vp->v_lock);
852 	if (stp->sd_iocblk) {
853 		if (stp->sd_iocblk != (mblk_t *)-1) {
854 			freemsg(stp->sd_iocblk);
855 		}
856 		stp->sd_iocblk = NULL;
857 	}
858 	stp->sd_vnode = stp->sd_pvnode = NULL;
859 	vp->v_stream = NULL;
860 	mutex_exit(&vp->v_lock);
861 	mutex_enter(&stp->sd_lock);
862 	freemsg(stp->sd_cmdblk);
863 	stp->sd_cmdblk = NULL;
864 	stp->sd_flag &= ~STRCLOSE;
865 	cv_broadcast(&stp->sd_monitor);
866 	mutex_exit(&stp->sd_lock);
867 
868 	if (freestp)
869 		shfree(stp);
870 	return (0);
871 }
872 
873 static int
strsink(queue_t * q,mblk_t * bp)874 strsink(queue_t *q, mblk_t *bp)
875 {
876 	struct copyresp *resp;
877 
878 	switch (bp->b_datap->db_type) {
879 	case M_FLUSH:
880 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
881 			*bp->b_rptr &= ~FLUSHR;
882 			bp->b_flag |= MSGNOLOOP;
883 			/*
884 			 * Protect against the driver passing up
885 			 * messages after it has done a qprocsoff.
886 			 */
887 			if (_OTHERQ(q)->q_next == NULL)
888 				freemsg(bp);
889 			else
890 				qreply(q, bp);
891 		} else {
892 			freemsg(bp);
893 		}
894 		break;
895 
896 	case M_COPYIN:
897 	case M_COPYOUT:
898 		if (bp->b_cont) {
899 			freemsg(bp->b_cont);
900 			bp->b_cont = NULL;
901 		}
902 		bp->b_datap->db_type = M_IOCDATA;
903 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
904 		resp = (struct copyresp *)bp->b_rptr;
905 		resp->cp_rval = (caddr_t)1;	/* failure */
906 		/*
907 		 * Protect against the driver passing up
908 		 * messages after it has done a qprocsoff.
909 		 */
910 		if (_OTHERQ(q)->q_next == NULL)
911 			freemsg(bp);
912 		else
913 			qreply(q, bp);
914 		break;
915 
916 	case M_IOCTL:
917 		if (bp->b_cont) {
918 			freemsg(bp->b_cont);
919 			bp->b_cont = NULL;
920 		}
921 		bp->b_datap->db_type = M_IOCNAK;
922 		/*
923 		 * Protect against the driver passing up
924 		 * messages after it has done a qprocsoff.
925 		 */
926 		if (_OTHERQ(q)->q_next == NULL)
927 			freemsg(bp);
928 		else
929 			qreply(q, bp);
930 		break;
931 
932 	default:
933 		freemsg(bp);
934 		break;
935 	}
936 
937 	return (0);
938 }
939 
940 /*
941  * Clean up after a process when it closes a stream.  This is called
942  * from closef for all closes, whereas strclose is called only for the
943  * last close on a stream.  The siglist is scanned for entries for the
944  * current process, and these are removed.
945  */
946 void
strclean(struct vnode * vp)947 strclean(struct vnode *vp)
948 {
949 	strsig_t *ssp, *pssp, *tssp;
950 	stdata_t *stp;
951 	int update = 0;
952 
953 	TRACE_1(TR_FAC_STREAMS_FR,
954 	    TR_STRCLEAN, "strclean:%p", vp);
955 	stp = vp->v_stream;
956 	pssp = NULL;
957 	mutex_enter(&stp->sd_lock);
958 	ssp = stp->sd_siglist;
959 	while (ssp) {
960 		if (ssp->ss_pidp == curproc->p_pidp) {
961 			tssp = ssp->ss_next;
962 			if (pssp)
963 				pssp->ss_next = tssp;
964 			else
965 				stp->sd_siglist = tssp;
966 			mutex_enter(&pidlock);
967 			PID_RELE(ssp->ss_pidp);
968 			mutex_exit(&pidlock);
969 			kmem_free(ssp, sizeof (strsig_t));
970 			update = 1;
971 			ssp = tssp;
972 		} else {
973 			pssp = ssp;
974 			ssp = ssp->ss_next;
975 		}
976 	}
977 	if (update) {
978 		stp->sd_sigflags = 0;
979 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
980 			stp->sd_sigflags |= ssp->ss_events;
981 	}
982 	mutex_exit(&stp->sd_lock);
983 }
984 
985 /*
986  * Used on the last close to remove any remaining items on the siglist.
987  * These could be present on the siglist due to I_ESETSIG calls that
988  * use process groups or processed that do not have an open file descriptor
989  * for this stream (Such entries would not be removed by strclean).
990  */
991 static void
strcleanall(struct vnode * vp)992 strcleanall(struct vnode *vp)
993 {
994 	strsig_t *ssp, *nssp;
995 	stdata_t *stp;
996 
997 	stp = vp->v_stream;
998 	mutex_enter(&stp->sd_lock);
999 	ssp = stp->sd_siglist;
1000 	stp->sd_siglist = NULL;
1001 	while (ssp) {
1002 		nssp = ssp->ss_next;
1003 		mutex_enter(&pidlock);
1004 		PID_RELE(ssp->ss_pidp);
1005 		mutex_exit(&pidlock);
1006 		kmem_free(ssp, sizeof (strsig_t));
1007 		ssp = nssp;
1008 	}
1009 	stp->sd_sigflags = 0;
1010 	mutex_exit(&stp->sd_lock);
1011 }
1012 
1013 /*
1014  * Retrieve the next message from the logical stream head read queue
1015  * using either rwnext (if sync stream) or getq_noenab.
1016  * It is the callers responsibility to call qbackenable after
1017  * it is finished with the message. The caller should not call
1018  * qbackenable until after any putback calls to avoid spurious backenabling.
1019  */
1020 mblk_t *
strget(struct stdata * stp,queue_t * q,struct uio * uiop,int first,int * errorp)1021 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
1022     int *errorp)
1023 {
1024 	mblk_t *bp;
1025 	int error;
1026 	ssize_t rbytes = 0;
1027 
1028 	/* Holding sd_lock prevents the read queue from changing  */
1029 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1030 
1031 	if (uiop != NULL && stp->sd_struiordq != NULL &&
1032 	    q->q_first == NULL &&
1033 	    (!first || (stp->sd_wakeq & RSLEEP))) {
1034 		/*
1035 		 * Stream supports rwnext() for the read side.
1036 		 * If this is the first time we're called by e.g. strread
1037 		 * only do the downcall if there is a deferred wakeup
1038 		 * (registered in sd_wakeq).
1039 		 */
1040 		struiod_t uiod;
1041 		struct iovec buf[IOV_MAX_STACK];
1042 		int iovlen = 0;
1043 
1044 		if (first)
1045 			stp->sd_wakeq &= ~RSLEEP;
1046 
1047 		if (uiop->uio_iovcnt > IOV_MAX_STACK) {
1048 			iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
1049 			uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
1050 		} else {
1051 			uiod.d_iov = buf;
1052 		}
1053 
1054 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
1055 		uiod.d_mp = 0;
1056 		/*
1057 		 * Mark that a thread is in rwnext on the read side
1058 		 * to prevent strrput from nacking ioctls immediately.
1059 		 * When the last concurrent rwnext returns
1060 		 * the ioctls are nack'ed.
1061 		 */
1062 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1063 		stp->sd_struiodnak++;
1064 		/*
1065 		 * Note: rwnext will drop sd_lock.
1066 		 */
1067 		error = rwnext(q, &uiod);
1068 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1069 		mutex_enter(&stp->sd_lock);
1070 		stp->sd_struiodnak--;
1071 		while (stp->sd_struiodnak == 0 &&
1072 		    ((bp = stp->sd_struionak) != NULL)) {
1073 			stp->sd_struionak = bp->b_next;
1074 			bp->b_next = NULL;
1075 			bp->b_datap->db_type = M_IOCNAK;
1076 			/*
1077 			 * Protect against the driver passing up
1078 			 * messages after it has done a qprocsoff.
1079 			 */
1080 			if (_OTHERQ(q)->q_next == NULL)
1081 				freemsg(bp);
1082 			else {
1083 				mutex_exit(&stp->sd_lock);
1084 				qreply(q, bp);
1085 				mutex_enter(&stp->sd_lock);
1086 			}
1087 		}
1088 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1089 		if (error == 0 || error == EWOULDBLOCK) {
1090 			if ((bp = uiod.d_mp) != NULL) {
1091 				*errorp = 0;
1092 				ASSERT(MUTEX_HELD(&stp->sd_lock));
1093 				if (iovlen != 0)
1094 					kmem_free(uiod.d_iov, iovlen);
1095 				return (bp);
1096 			}
1097 			error = 0;
1098 		} else if (error == EINVAL) {
1099 			/*
1100 			 * The stream plumbing must have
1101 			 * changed while we were away, so
1102 			 * just turn off rwnext()s.
1103 			 */
1104 			error = 0;
1105 		} else if (error == EBUSY) {
1106 			/*
1107 			 * The module might have data in transit using putnext
1108 			 * Fall back on waiting + getq.
1109 			 */
1110 			error = 0;
1111 		} else {
1112 			*errorp = error;
1113 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1114 			if (iovlen != 0)
1115 				kmem_free(uiod.d_iov, iovlen);
1116 			return (NULL);
1117 		}
1118 
1119 		if (iovlen != 0)
1120 			kmem_free(uiod.d_iov, iovlen);
1121 
1122 		/*
1123 		 * Try a getq in case a rwnext() generated mblk
1124 		 * has bubbled up via strrput().
1125 		 */
1126 	}
1127 	*errorp = 0;
1128 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1129 
1130 	/*
1131 	 * If we have a valid uio, try and use this as a guide for how
1132 	 * many bytes to retrieve from the queue via getq_noenab().
1133 	 * Doing this can avoid unneccesary counting of overlong
1134 	 * messages in putback(). We currently only do this for sockets
1135 	 * and only if there is no sd_rputdatafunc hook.
1136 	 *
1137 	 * The sd_rputdatafunc hook transforms the entire message
1138 	 * before any bytes in it can be given to a client. So, rbytes
1139 	 * must be 0 if there is a hook.
1140 	 */
1141 	if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
1142 	    (stp->sd_rputdatafunc == NULL))
1143 		rbytes = uiop->uio_resid;
1144 
1145 	return (getq_noenab(q, rbytes));
1146 }
1147 
1148 /*
1149  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1150  * If the message does not fit in the uio the remainder of it is returned;
1151  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1152  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1153  * the error code, the message is consumed, and NULL is returned.
1154  */
1155 static mblk_t *
struiocopyout(mblk_t * bp,struct uio * uiop,int * errorp)1156 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1157 {
1158 	int error;
1159 	ptrdiff_t n;
1160 	mblk_t *nbp;
1161 
1162 	ASSERT(bp->b_wptr >= bp->b_rptr);
1163 
1164 	do {
1165 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1166 			ASSERT(n > 0);
1167 
1168 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1169 			if (error != 0) {
1170 				freemsg(bp);
1171 				*errorp = error;
1172 				return (NULL);
1173 			}
1174 		}
1175 
1176 		bp->b_rptr += n;
1177 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1178 			nbp = bp;
1179 			bp = bp->b_cont;
1180 			freeb(nbp);
1181 		}
1182 	} while (bp != NULL && uiop->uio_resid > 0);
1183 
1184 	*errorp = 0;
1185 	return (bp);
1186 }
1187 
1188 /*
1189  * Read a stream according to the mode flags in sd_flag:
1190  *
1191  * (default mode)		- Byte stream, msg boundaries are ignored
1192  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
1193  *				any data remaining in msg
1194  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1195  *				any remaining data on head of read queue
1196  *
1197  * Consume readable messages on the front of the queue until
1198  * ttolwp(curthread)->lwp_count
1199  * is satisfied, the readable messages are exhausted, or a message
1200  * boundary is reached in a message mode.  If no data was read and
1201  * the stream was not opened with the NDELAY flag, block until data arrives.
1202  * Otherwise return the data read and update the count.
1203  *
1204  * In default mode a 0 length message signifies end-of-file and terminates
1205  * a read in progress.  The 0 length message is removed from the queue
1206  * only if it is the only message read (no data is read).
1207  *
1208  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1209  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1210  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1211  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1212  * are unlinked from and M_DATA blocks in the message, the protos are
1213  * thrown away, and the data is read.
1214  */
1215 /* ARGSUSED */
1216 int
strread(struct vnode * vp,struct uio * uiop,cred_t * crp)1217 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1218 {
1219 	struct stdata *stp;
1220 	mblk_t *bp, *nbp;
1221 	queue_t *q;
1222 	int error = 0;
1223 	uint_t old_sd_flag;
1224 	int first;
1225 	char rflg;
1226 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
1227 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
1228 	short delim;
1229 	unsigned char pri = 0;
1230 	char waitflag;
1231 	unsigned char type;
1232 
1233 	TRACE_1(TR_FAC_STREAMS_FR,
1234 	    TR_STRREAD_ENTER, "strread:%p", vp);
1235 	ASSERT(vp->v_stream);
1236 	stp = vp->v_stream;
1237 
1238 	mutex_enter(&stp->sd_lock);
1239 
1240 	if ((error = i_straccess(stp, JCREAD)) != 0) {
1241 		mutex_exit(&stp->sd_lock);
1242 		return (error);
1243 	}
1244 
1245 	if (stp->sd_flag & (STRDERR|STPLEX)) {
1246 		error = strgeterr(stp, STRDERR|STPLEX, 0);
1247 		if (error != 0) {
1248 			mutex_exit(&stp->sd_lock);
1249 			return (error);
1250 		}
1251 	}
1252 
1253 	/*
1254 	 * Loop terminates when uiop->uio_resid == 0.
1255 	 */
1256 	rflg = 0;
1257 	waitflag = READWAIT;
1258 	q = _RD(stp->sd_wrq);
1259 	for (;;) {
1260 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1261 		old_sd_flag = stp->sd_flag;
1262 		mark = 0;
1263 		delim = 0;
1264 		first = 1;
1265 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1266 			int done = 0;
1267 
1268 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1269 
1270 			if (error != 0)
1271 				goto oops;
1272 
1273 			if (stp->sd_flag & (STRHUP|STREOF)) {
1274 				goto oops;
1275 			}
1276 			if (rflg && !(stp->sd_flag & STRDELIM)) {
1277 				goto oops;
1278 			}
1279 			/*
1280 			 * If a read(fd,buf,0) has been done, there is no
1281 			 * need to sleep. We always have zero bytes to
1282 			 * return.
1283 			 */
1284 			if (uiop->uio_resid == 0) {
1285 				goto oops;
1286 			}
1287 
1288 			qbackenable(q, 0);
1289 
1290 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1291 			    "strread calls strwaitq:%p, %p, %p",
1292 			    vp, uiop, crp);
1293 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1294 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
1295 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1296 				    "strread error or done:%p, %p, %p",
1297 				    vp, uiop, crp);
1298 				if ((uiop->uio_fmode & FNDELAY) &&
1299 				    (stp->sd_flag & OLDNDELAY) &&
1300 				    (error == EAGAIN))
1301 					error = 0;
1302 				goto oops;
1303 			}
1304 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1305 			    "strread awakes:%p, %p, %p", vp, uiop, crp);
1306 			if ((error = i_straccess(stp, JCREAD)) != 0) {
1307 				goto oops;
1308 			}
1309 			first = 0;
1310 		}
1311 
1312 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1313 		ASSERT(bp);
1314 		pri = bp->b_band;
1315 		/*
1316 		 * Extract any mark information. If the message is not
1317 		 * completely consumed this information will be put in the mblk
1318 		 * that is putback.
1319 		 * If MSGMARKNEXT is set and the message is completely consumed
1320 		 * the STRATMARK flag will be set below. Likewise, if
1321 		 * MSGNOTMARKNEXT is set and the message is
1322 		 * completely consumed STRNOTATMARK will be set.
1323 		 *
1324 		 * For some unknown reason strread only breaks the read at the
1325 		 * last mark.
1326 		 */
1327 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1328 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1329 		    (MSGMARKNEXT|MSGNOTMARKNEXT));
1330 		if (mark != 0 && bp == stp->sd_mark) {
1331 			if (rflg) {
1332 				putback(stp, q, bp, pri);
1333 				goto oops;
1334 			}
1335 			mark |= _LASTMARK;
1336 			stp->sd_mark = NULL;
1337 		}
1338 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1339 			delim = 1;
1340 		mutex_exit(&stp->sd_lock);
1341 
1342 		if (STREAM_NEEDSERVICE(stp))
1343 			stream_runservice(stp);
1344 
1345 		type = bp->b_datap->db_type;
1346 
1347 		switch (type) {
1348 
1349 		case M_DATA:
1350 ismdata:
1351 			if (msgnodata(bp)) {
1352 				if (mark || delim) {
1353 					freemsg(bp);
1354 				} else if (rflg) {
1355 
1356 					/*
1357 					 * If already read data put zero
1358 					 * length message back on queue else
1359 					 * free msg and return 0.
1360 					 */
1361 					bp->b_band = pri;
1362 					mutex_enter(&stp->sd_lock);
1363 					putback(stp, q, bp, pri);
1364 					mutex_exit(&stp->sd_lock);
1365 				} else {
1366 					freemsg(bp);
1367 				}
1368 				error =  0;
1369 				goto oops1;
1370 			}
1371 
1372 			rflg = 1;
1373 			waitflag |= NOINTR;
1374 			bp = struiocopyout(bp, uiop, &error);
1375 			if (error != 0)
1376 				goto oops1;
1377 
1378 			mutex_enter(&stp->sd_lock);
1379 			if (bp) {
1380 				/*
1381 				 * Have remaining data in message.
1382 				 * Free msg if in discard mode.
1383 				 */
1384 				if (stp->sd_read_opt & RD_MSGDIS) {
1385 					freemsg(bp);
1386 				} else {
1387 					bp->b_band = pri;
1388 					if ((mark & _LASTMARK) &&
1389 					    (stp->sd_mark == NULL))
1390 						stp->sd_mark = bp;
1391 					bp->b_flag |= mark & ~_LASTMARK;
1392 					if (delim)
1393 						bp->b_flag |= MSGDELIM;
1394 					if (msgnodata(bp))
1395 						freemsg(bp);
1396 					else
1397 						putback(stp, q, bp, pri);
1398 				}
1399 			} else {
1400 				/*
1401 				 * Consumed the complete message.
1402 				 * Move the MSG*MARKNEXT information
1403 				 * to the stream head just in case
1404 				 * the read queue becomes empty.
1405 				 *
1406 				 * If the stream head was at the mark
1407 				 * (STRATMARK) before we dropped sd_lock above
1408 				 * and some data was consumed then we have
1409 				 * moved past the mark thus STRATMARK is
1410 				 * cleared. However, if a message arrived in
1411 				 * strrput during the copyout above causing
1412 				 * STRATMARK to be set we can not clear that
1413 				 * flag.
1414 				 */
1415 				if (mark &
1416 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1417 					if (mark & MSGMARKNEXT) {
1418 						stp->sd_flag &= ~STRNOTATMARK;
1419 						stp->sd_flag |= STRATMARK;
1420 					} else if (mark & MSGNOTMARKNEXT) {
1421 						stp->sd_flag &= ~STRATMARK;
1422 						stp->sd_flag |= STRNOTATMARK;
1423 					} else {
1424 						stp->sd_flag &=
1425 						    ~(STRATMARK|STRNOTATMARK);
1426 					}
1427 				} else if (rflg && (old_sd_flag & STRATMARK)) {
1428 					stp->sd_flag &= ~STRATMARK;
1429 				}
1430 			}
1431 
1432 			/*
1433 			 * Check for signal messages at the front of the read
1434 			 * queue and generate the signal(s) if appropriate.
1435 			 * The only signal that can be on queue is M_SIG at
1436 			 * this point.
1437 			 */
1438 			while ((((bp = q->q_first)) != NULL) &&
1439 			    (bp->b_datap->db_type == M_SIG)) {
1440 				bp = getq_noenab(q, 0);
1441 				/*
1442 				 * sd_lock is held so the content of the
1443 				 * read queue can not change.
1444 				 */
1445 				ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
1446 				strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
1447 				mutex_exit(&stp->sd_lock);
1448 				freemsg(bp);
1449 				if (STREAM_NEEDSERVICE(stp))
1450 					stream_runservice(stp);
1451 				mutex_enter(&stp->sd_lock);
1452 			}
1453 
1454 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1455 			    delim ||
1456 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1457 				goto oops;
1458 			}
1459 			continue;
1460 
1461 		case M_SIG:
1462 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1463 			freemsg(bp);
1464 			mutex_enter(&stp->sd_lock);
1465 			continue;
1466 
1467 		case M_PROTO:
1468 		case M_PCPROTO:
1469 			/*
1470 			 * Only data messages are readable.
1471 			 * Any others generate an error, unless
1472 			 * RD_PROTDIS or RD_PROTDAT is set.
1473 			 */
1474 			if (stp->sd_read_opt & RD_PROTDAT) {
1475 				for (nbp = bp; nbp; nbp = nbp->b_next) {
1476 					if ((nbp->b_datap->db_type ==
1477 					    M_PROTO) ||
1478 					    (nbp->b_datap->db_type ==
1479 					    M_PCPROTO)) {
1480 						nbp->b_datap->db_type = M_DATA;
1481 					} else {
1482 						break;
1483 					}
1484 				}
1485 				/*
1486 				 * clear stream head hi pri flag based on
1487 				 * first message
1488 				 */
1489 				if (type == M_PCPROTO) {
1490 					mutex_enter(&stp->sd_lock);
1491 					stp->sd_flag &= ~STRPRI;
1492 					mutex_exit(&stp->sd_lock);
1493 				}
1494 				goto ismdata;
1495 			} else if (stp->sd_read_opt & RD_PROTDIS) {
1496 				/*
1497 				 * discard non-data messages
1498 				 */
1499 				while (bp &&
1500 				    ((bp->b_datap->db_type == M_PROTO) ||
1501 				    (bp->b_datap->db_type == M_PCPROTO))) {
1502 					nbp = unlinkb(bp);
1503 					freeb(bp);
1504 					bp = nbp;
1505 				}
1506 				/*
1507 				 * clear stream head hi pri flag based on
1508 				 * first message
1509 				 */
1510 				if (type == M_PCPROTO) {
1511 					mutex_enter(&stp->sd_lock);
1512 					stp->sd_flag &= ~STRPRI;
1513 					mutex_exit(&stp->sd_lock);
1514 				}
1515 				if (bp) {
1516 					bp->b_band = pri;
1517 					goto ismdata;
1518 				} else {
1519 					break;
1520 				}
1521 			}
1522 			/* FALLTHRU */
1523 		case M_PASSFP:
1524 			if ((bp->b_datap->db_type == M_PASSFP) &&
1525 			    (stp->sd_read_opt & RD_PROTDIS)) {
1526 				freemsg(bp);
1527 				break;
1528 			}
1529 			mutex_enter(&stp->sd_lock);
1530 			putback(stp, q, bp, pri);
1531 			mutex_exit(&stp->sd_lock);
1532 			if (rflg == 0)
1533 				error = EBADMSG;
1534 			goto oops1;
1535 
1536 		default:
1537 			/*
1538 			 * Garbage on stream head read queue.
1539 			 */
1540 			cmn_err(CE_WARN, "bad %x found at stream head\n",
1541 			    bp->b_datap->db_type);
1542 			freemsg(bp);
1543 			goto oops1;
1544 		}
1545 		mutex_enter(&stp->sd_lock);
1546 	}
1547 oops:
1548 	mutex_exit(&stp->sd_lock);
1549 oops1:
1550 	qbackenable(q, pri);
1551 	return (error);
1552 #undef	_LASTMARK
1553 }
1554 
1555 /*
1556  * Default processing of M_PROTO/M_PCPROTO messages.
1557  * Determine which wakeups and signals are needed.
1558  * This can be replaced by a user-specified procedure for kernel users
1559  * of STREAMS.
1560  */
1561 /* ARGSUSED */
1562 mblk_t *
strrput_proto(vnode_t * vp,mblk_t * mp,strwakeup_t * wakeups,strsigset_t * firstmsgsigs,strsigset_t * allmsgsigs,strpollset_t * pollwakeups)1563 strrput_proto(vnode_t *vp, mblk_t *mp,
1564     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1565     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1566 {
1567 	*wakeups = RSLEEP;
1568 	*allmsgsigs = 0;
1569 
1570 	switch (mp->b_datap->db_type) {
1571 	case M_PROTO:
1572 		if (mp->b_band == 0) {
1573 			*firstmsgsigs = S_INPUT | S_RDNORM;
1574 			*pollwakeups = POLLIN | POLLRDNORM;
1575 		} else {
1576 			*firstmsgsigs = S_INPUT | S_RDBAND;
1577 			*pollwakeups = POLLIN | POLLRDBAND;
1578 		}
1579 		break;
1580 	case M_PCPROTO:
1581 		*firstmsgsigs = S_HIPRI;
1582 		*pollwakeups = POLLPRI;
1583 		break;
1584 	}
1585 	return (mp);
1586 }
1587 
1588 /*
1589  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1590  * M_PASSFP messages.
1591  * Determine which wakeups and signals are needed.
1592  * This can be replaced by a user-specified procedure for kernel users
1593  * of STREAMS.
1594  */
1595 /* ARGSUSED */
1596 mblk_t *
strrput_misc(vnode_t * vp,mblk_t * mp,strwakeup_t * wakeups,strsigset_t * firstmsgsigs,strsigset_t * allmsgsigs,strpollset_t * pollwakeups)1597 strrput_misc(vnode_t *vp, mblk_t *mp,
1598     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1599     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1600 {
1601 	*wakeups = 0;
1602 	*firstmsgsigs = 0;
1603 	*allmsgsigs = 0;
1604 	*pollwakeups = 0;
1605 	return (mp);
1606 }
1607 
1608 /*
1609  * Stream read put procedure.  Called from downstream driver/module
1610  * with messages for the stream head.  Data, protocol, and in-stream
1611  * signal messages are placed on the queue, others are handled directly.
1612  */
1613 int
strrput(queue_t * q,mblk_t * bp)1614 strrput(queue_t *q, mblk_t *bp)
1615 {
1616 	struct stdata	*stp;
1617 	ulong_t		rput_opt;
1618 	strwakeup_t	wakeups;
1619 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
1620 	strsigset_t	allmsgsigs;	/* Signals for all messages */
1621 	strsigset_t	signals;	/* Signals events to generate */
1622 	strpollset_t	pollwakeups;
1623 	mblk_t		*nextbp;
1624 	uchar_t		band = 0;
1625 	int		hipri_sig;
1626 
1627 	stp = (struct stdata *)q->q_ptr;
1628 	/*
1629 	 * Use rput_opt for optimized access to the SR_ flags except
1630 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
1631 	 * is modified by strpoll().
1632 	 */
1633 	rput_opt = stp->sd_rput_opt;
1634 
1635 	ASSERT(qclaimed(q));
1636 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1637 	    "strrput called with message type:q %p bp %p", q, bp);
1638 
1639 	/*
1640 	 * Perform initial processing and pass to the parameterized functions.
1641 	 */
1642 	ASSERT(bp->b_next == NULL);
1643 
1644 	switch (bp->b_datap->db_type) {
1645 	case M_DATA:
1646 		/*
1647 		 * sockfs is the only consumer of STREOF and when it is set,
1648 		 * it implies that the receiver is not interested in receiving
1649 		 * any more data, hence the mblk is freed to prevent unnecessary
1650 		 * message queueing at the stream head.
1651 		 */
1652 		if (stp->sd_flag == STREOF) {
1653 			freemsg(bp);
1654 			return (0);
1655 		}
1656 		if ((rput_opt & SR_IGN_ZEROLEN) &&
1657 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1658 			/*
1659 			 * Ignore zero-length M_DATA messages. These might be
1660 			 * generated by some transports.
1661 			 * The zero-length M_DATA messages, even if they
1662 			 * are ignored, should effect the atmark tracking and
1663 			 * should wake up a thread sleeping in strwaitmark.
1664 			 */
1665 			mutex_enter(&stp->sd_lock);
1666 			if (bp->b_flag & MSGMARKNEXT) {
1667 				/*
1668 				 * Record the position of the mark either
1669 				 * in q_last or in STRATMARK.
1670 				 */
1671 				if (q->q_last != NULL) {
1672 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1673 					q->q_last->b_flag |= MSGMARKNEXT;
1674 				} else {
1675 					stp->sd_flag &= ~STRNOTATMARK;
1676 					stp->sd_flag |= STRATMARK;
1677 				}
1678 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
1679 				/*
1680 				 * Record that this is not the position of
1681 				 * the mark either in q_last or in
1682 				 * STRNOTATMARK.
1683 				 */
1684 				if (q->q_last != NULL) {
1685 					q->q_last->b_flag &= ~MSGMARKNEXT;
1686 					q->q_last->b_flag |= MSGNOTMARKNEXT;
1687 				} else {
1688 					stp->sd_flag &= ~STRATMARK;
1689 					stp->sd_flag |= STRNOTATMARK;
1690 				}
1691 			}
1692 			if (stp->sd_flag & RSLEEP) {
1693 				stp->sd_flag &= ~RSLEEP;
1694 				cv_broadcast(&q->q_wait);
1695 			}
1696 			mutex_exit(&stp->sd_lock);
1697 			freemsg(bp);
1698 			return (0);
1699 		}
1700 		wakeups = RSLEEP;
1701 		if (bp->b_band == 0) {
1702 			firstmsgsigs = S_INPUT | S_RDNORM;
1703 			pollwakeups = POLLIN | POLLRDNORM;
1704 		} else {
1705 			firstmsgsigs = S_INPUT | S_RDBAND;
1706 			pollwakeups = POLLIN | POLLRDBAND;
1707 		}
1708 		if (rput_opt & SR_SIGALLDATA)
1709 			allmsgsigs = firstmsgsigs;
1710 		else
1711 			allmsgsigs = 0;
1712 
1713 		mutex_enter(&stp->sd_lock);
1714 		if ((rput_opt & SR_CONSOL_DATA) &&
1715 		    (q->q_last != NULL) &&
1716 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1717 			/*
1718 			 * Consolidate an M_DATA message onto an M_DATA,
1719 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
1720 			 * The consolidation does not take place if
1721 			 * the old message is marked with either of the
1722 			 * marks or the delim flag or if the new
1723 			 * message is marked with MSGMARK. The MSGMARK
1724 			 * check is needed to handle the odd semantics of
1725 			 * MSGMARK where essentially the whole message
1726 			 * is to be treated as marked.
1727 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1728 			 * new message to the front of the b_cont chain.
1729 			 */
1730 			mblk_t *lbp = q->q_last;
1731 			unsigned char db_type = lbp->b_datap->db_type;
1732 
1733 			if ((db_type == M_DATA || db_type == M_PROTO ||
1734 			    db_type == M_PCPROTO) &&
1735 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
1736 				rmvq_noenab(q, lbp);
1737 				/*
1738 				 * The first message in the b_cont list
1739 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1740 				 * We need to handle the case where we
1741 				 * are appending:
1742 				 *
1743 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1744 				 * 2) a MSGMARKNEXT to a plain message.
1745 				 * 3) a MSGNOTMARKNEXT to a plain message
1746 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1747 				 *    message.
1748 				 *
1749 				 * Thus we never append a MSGMARKNEXT or
1750 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1751 				 */
1752 				if (bp->b_flag & MSGMARKNEXT) {
1753 					lbp->b_flag |= MSGMARKNEXT;
1754 					lbp->b_flag &= ~MSGNOTMARKNEXT;
1755 					bp->b_flag &= ~MSGMARKNEXT;
1756 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
1757 					lbp->b_flag |= MSGNOTMARKNEXT;
1758 					bp->b_flag &= ~MSGNOTMARKNEXT;
1759 				}
1760 
1761 				linkb(lbp, bp);
1762 				bp = lbp;
1763 				/*
1764 				 * The new message logically isn't the first
1765 				 * even though the q_first check below thinks
1766 				 * it is. Clear the firstmsgsigs to make it
1767 				 * not appear to be first.
1768 				 */
1769 				firstmsgsigs = 0;
1770 			}
1771 		}
1772 		break;
1773 
1774 	case M_PASSFP:
1775 		wakeups = RSLEEP;
1776 		allmsgsigs = 0;
1777 		if (bp->b_band == 0) {
1778 			firstmsgsigs = S_INPUT | S_RDNORM;
1779 			pollwakeups = POLLIN | POLLRDNORM;
1780 		} else {
1781 			firstmsgsigs = S_INPUT | S_RDBAND;
1782 			pollwakeups = POLLIN | POLLRDBAND;
1783 		}
1784 		mutex_enter(&stp->sd_lock);
1785 		break;
1786 
1787 	case M_PROTO:
1788 	case M_PCPROTO:
1789 		ASSERT(stp->sd_rprotofunc != NULL);
1790 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1791 		    &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1792 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1793 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1794 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1795 		POLLWRBAND)
1796 
1797 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1798 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1799 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1800 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1801 
1802 		mutex_enter(&stp->sd_lock);
1803 		break;
1804 
1805 	default:
1806 		ASSERT(stp->sd_rmiscfunc != NULL);
1807 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1808 		    &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1809 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1810 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1811 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1812 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1813 #undef	ALLSIG
1814 #undef	ALLPOLL
1815 		mutex_enter(&stp->sd_lock);
1816 		break;
1817 	}
1818 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1819 
1820 	/* By default generate superset of signals */
1821 	signals = (firstmsgsigs | allmsgsigs);
1822 
1823 	/*
1824 	 * The  proto and misc functions can return multiple messages
1825 	 * as a b_next chain. Such messages are processed separately.
1826 	 */
1827 one_more:
1828 	hipri_sig = 0;
1829 	if (bp == NULL) {
1830 		nextbp = NULL;
1831 	} else {
1832 		nextbp = bp->b_next;
1833 		bp->b_next = NULL;
1834 
1835 		switch (bp->b_datap->db_type) {
1836 		case M_PCPROTO:
1837 			/*
1838 			 * Only one priority protocol message is allowed at the
1839 			 * stream head at a time.
1840 			 */
1841 			if (stp->sd_flag & STRPRI) {
1842 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1843 				    "M_PCPROTO already at head");
1844 				freemsg(bp);
1845 				mutex_exit(&stp->sd_lock);
1846 				goto done;
1847 			}
1848 			stp->sd_flag |= STRPRI;
1849 			hipri_sig = 1;
1850 			/* FALLTHRU */
1851 		case M_DATA:
1852 		case M_PROTO:
1853 		case M_PASSFP:
1854 			band = bp->b_band;
1855 			/*
1856 			 * Marking doesn't work well when messages
1857 			 * are marked in more than one band.  We only
1858 			 * remember the last message received, even if
1859 			 * it is placed on the queue ahead of other
1860 			 * marked messages.
1861 			 */
1862 			if (bp->b_flag & MSGMARK)
1863 				stp->sd_mark = bp;
1864 			(void) putq(q, bp);
1865 
1866 			/*
1867 			 * If message is a PCPROTO message, always use
1868 			 * firstmsgsigs to determine if a signal should be
1869 			 * sent as strrput is the only place to send
1870 			 * signals for PCPROTO. Other messages are based on
1871 			 * the STRGETINPROG flag. The flag determines if
1872 			 * strrput or (k)strgetmsg will be responsible for
1873 			 * sending the signals, in the firstmsgsigs case.
1874 			 */
1875 			if ((hipri_sig == 1) ||
1876 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
1877 			    (q->q_first == bp)))
1878 				signals = (firstmsgsigs | allmsgsigs);
1879 			else
1880 				signals = allmsgsigs;
1881 			break;
1882 
1883 		default:
1884 			mutex_exit(&stp->sd_lock);
1885 			(void) strrput_nondata(q, bp);
1886 			mutex_enter(&stp->sd_lock);
1887 			break;
1888 		}
1889 	}
1890 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1891 	/*
1892 	 * Wake sleeping read/getmsg and cancel deferred wakeup
1893 	 */
1894 	if (wakeups & RSLEEP)
1895 		stp->sd_wakeq &= ~RSLEEP;
1896 
1897 	wakeups &= stp->sd_flag;
1898 	if (wakeups & RSLEEP) {
1899 		stp->sd_flag &= ~RSLEEP;
1900 		cv_broadcast(&q->q_wait);
1901 	}
1902 	if (wakeups & WSLEEP) {
1903 		stp->sd_flag &= ~WSLEEP;
1904 		cv_broadcast(&_WR(q)->q_wait);
1905 	}
1906 
1907 	if (pollwakeups != 0) {
1908 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
1909 			/*
1910 			 * Can't use rput_opt since it was not
1911 			 * read when sd_lock was held and SR_POLLIN is changed
1912 			 * by strpoll() under sd_lock.
1913 			 */
1914 			if (!(stp->sd_rput_opt & SR_POLLIN))
1915 				goto no_pollwake;
1916 			stp->sd_rput_opt &= ~SR_POLLIN;
1917 		}
1918 		mutex_exit(&stp->sd_lock);
1919 		pollwakeup(&stp->sd_pollist, pollwakeups);
1920 		mutex_enter(&stp->sd_lock);
1921 	}
1922 no_pollwake:
1923 
1924 	/*
1925 	 * strsendsig can handle multiple signals with a
1926 	 * single call.
1927 	 */
1928 	if (stp->sd_sigflags & signals)
1929 		strsendsig(stp->sd_siglist, signals, band, 0);
1930 	mutex_exit(&stp->sd_lock);
1931 
1932 
1933 done:
1934 	if (nextbp == NULL)
1935 		return (0);
1936 
1937 	/*
1938 	 * Any signals were handled the first time.
1939 	 * Wakeups and pollwakeups are redone to avoid any race
1940 	 * conditions - all the messages are not queued until the
1941 	 * last message has been processed by strrput.
1942 	 */
1943 	bp = nextbp;
1944 	signals = firstmsgsigs = allmsgsigs = 0;
1945 	mutex_enter(&stp->sd_lock);
1946 	goto one_more;
1947 }
1948 
1949 static void
log_dupioc(queue_t * rq,mblk_t * bp)1950 log_dupioc(queue_t *rq, mblk_t *bp)
1951 {
1952 	queue_t *wq, *qp;
1953 	char *modnames, *mnp, *dname;
1954 	size_t maxmodstr;
1955 	boolean_t islast;
1956 
1957 	/*
1958 	 * Allocate a buffer large enough to hold the names of nstrpush modules
1959 	 * and one driver, with spaces between and NUL terminator.  If we can't
1960 	 * get memory, then we'll just log the driver name.
1961 	 */
1962 	maxmodstr = nstrpush * (FMNAMESZ + 1);
1963 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1964 
1965 	/* march down write side to print log message down to the driver */
1966 	wq = WR(rq);
1967 
1968 	/* make sure q_next doesn't shift around while we're grabbing data */
1969 	claimstr(wq);
1970 	qp = wq->q_next;
1971 	do {
1972 		dname = Q2NAME(qp);
1973 		islast = !SAMESTR(qp) || qp->q_next == NULL;
1974 		if (modnames == NULL) {
1975 			/*
1976 			 * If we don't have memory, then get the driver name in
1977 			 * the log where we can see it.  Note that memory
1978 			 * pressure is a possible cause of these sorts of bugs.
1979 			 */
1980 			if (islast) {
1981 				modnames = dname;
1982 				maxmodstr = 0;
1983 			}
1984 		} else {
1985 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1986 			if (!islast)
1987 				*mnp++ = ' ';
1988 		}
1989 		qp = qp->q_next;
1990 	} while (!islast);
1991 	releasestr(wq);
1992 	/* Cannot happen unless stream head is corrupt. */
1993 	ASSERT(modnames != NULL);
1994 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1995 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
1996 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1997 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1998 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1999 	if (maxmodstr != 0)
2000 		kmem_free(modnames, maxmodstr);
2001 }
2002 
2003 int
strrput_nondata(queue_t * q,mblk_t * bp)2004 strrput_nondata(queue_t *q, mblk_t *bp)
2005 {
2006 	struct stdata *stp;
2007 	struct iocblk *iocbp;
2008 	struct stroptions *sop;
2009 	struct copyreq *reqp;
2010 	struct copyresp *resp;
2011 	unsigned char bpri;
2012 	unsigned char  flushed_already = 0;
2013 
2014 	stp = (struct stdata *)q->q_ptr;
2015 
2016 	ASSERT(!(stp->sd_flag & STPLEX));
2017 	ASSERT(qclaimed(q));
2018 
2019 	switch (bp->b_datap->db_type) {
2020 	case M_ERROR:
2021 		/*
2022 		 * An error has occurred downstream, the errno is in the first
2023 		 * bytes of the message.
2024 		 */
2025 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
2026 			unsigned char rw = 0;
2027 
2028 			mutex_enter(&stp->sd_lock);
2029 			if (*bp->b_rptr != NOERROR) {	/* read error */
2030 				if (*bp->b_rptr != 0) {
2031 					if (stp->sd_flag & STRDERR)
2032 						flushed_already |= FLUSHR;
2033 					stp->sd_flag |= STRDERR;
2034 					rw |= FLUSHR;
2035 				} else {
2036 					stp->sd_flag &= ~STRDERR;
2037 				}
2038 				stp->sd_rerror = *bp->b_rptr;
2039 			}
2040 			bp->b_rptr++;
2041 			if (*bp->b_rptr != NOERROR) {	/* write error */
2042 				if (*bp->b_rptr != 0) {
2043 					if (stp->sd_flag & STWRERR)
2044 						flushed_already |= FLUSHW;
2045 					stp->sd_flag |= STWRERR;
2046 					rw |= FLUSHW;
2047 				} else {
2048 					stp->sd_flag &= ~STWRERR;
2049 				}
2050 				stp->sd_werror = *bp->b_rptr;
2051 			}
2052 			if (rw) {
2053 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
2054 				    "strrput cv_broadcast:q %p, bp %p",
2055 				    q, bp);
2056 				cv_broadcast(&q->q_wait); /* readers */
2057 				cv_broadcast(&_WR(q)->q_wait); /* writers */
2058 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
2059 
2060 				mutex_exit(&stp->sd_lock);
2061 				pollwakeup(&stp->sd_pollist, POLLERR);
2062 				mutex_enter(&stp->sd_lock);
2063 
2064 				if (stp->sd_sigflags & S_ERROR)
2065 					strsendsig(stp->sd_siglist, S_ERROR, 0,
2066 					    ((rw & FLUSHR) ? stp->sd_rerror :
2067 					    stp->sd_werror));
2068 				mutex_exit(&stp->sd_lock);
2069 				/*
2070 				 * Send the M_FLUSH only
2071 				 * for the first M_ERROR
2072 				 * message on the stream
2073 				 */
2074 				if (flushed_already == rw) {
2075 					freemsg(bp);
2076 					return (0);
2077 				}
2078 
2079 				bp->b_datap->db_type = M_FLUSH;
2080 				*bp->b_rptr = rw;
2081 				bp->b_wptr = bp->b_rptr + 1;
2082 				/*
2083 				 * Protect against the driver
2084 				 * passing up messages after
2085 				 * it has done a qprocsoff
2086 				 */
2087 				if (_OTHERQ(q)->q_next == NULL)
2088 					freemsg(bp);
2089 				else
2090 					qreply(q, bp);
2091 				return (0);
2092 			} else
2093 				mutex_exit(&stp->sd_lock);
2094 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
2095 				if (stp->sd_flag & (STRDERR|STWRERR))
2096 					flushed_already = FLUSHRW;
2097 				mutex_enter(&stp->sd_lock);
2098 				stp->sd_flag |= (STRDERR|STWRERR);
2099 				stp->sd_rerror = *bp->b_rptr;
2100 				stp->sd_werror = *bp->b_rptr;
2101 				TRACE_2(TR_FAC_STREAMS_FR,
2102 				    TR_STRRPUT_WAKE2,
2103 				    "strrput wakeup #2:q %p, bp %p", q, bp);
2104 				cv_broadcast(&q->q_wait); /* the readers */
2105 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
2106 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
2107 
2108 				mutex_exit(&stp->sd_lock);
2109 				pollwakeup(&stp->sd_pollist, POLLERR);
2110 				mutex_enter(&stp->sd_lock);
2111 
2112 				if (stp->sd_sigflags & S_ERROR)
2113 					strsendsig(stp->sd_siglist, S_ERROR, 0,
2114 					    (stp->sd_werror ? stp->sd_werror :
2115 					    stp->sd_rerror));
2116 				mutex_exit(&stp->sd_lock);
2117 
2118 				/*
2119 				 * Send the M_FLUSH only
2120 				 * for the first M_ERROR
2121 				 * message on the stream
2122 				 */
2123 				if (flushed_already != FLUSHRW) {
2124 					bp->b_datap->db_type = M_FLUSH;
2125 					*bp->b_rptr = FLUSHRW;
2126 					/*
2127 					 * Protect against the driver passing up
2128 					 * messages after it has done a
2129 					 * qprocsoff.
2130 					 */
2131 					if (_OTHERQ(q)->q_next == NULL)
2132 						freemsg(bp);
2133 					else
2134 						qreply(q, bp);
2135 					return (0);
2136 				}
2137 		}
2138 		freemsg(bp);
2139 		return (0);
2140 
2141 	case M_HANGUP:
2142 
2143 		freemsg(bp);
2144 		mutex_enter(&stp->sd_lock);
2145 		stp->sd_werror = ENXIO;
2146 		stp->sd_flag |= STRHUP;
2147 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
2148 
2149 		/*
2150 		 * send signal if controlling tty
2151 		 */
2152 
2153 		if (stp->sd_sidp) {
2154 			prsignal(stp->sd_sidp, SIGHUP);
2155 			if (stp->sd_sidp != stp->sd_pgidp)
2156 				pgsignal(stp->sd_pgidp, SIGTSTP);
2157 		}
2158 
2159 		/*
2160 		 * wake up read, write, and exception pollers and
2161 		 * reset wakeup mechanism.
2162 		 */
2163 		cv_broadcast(&q->q_wait);	/* the readers */
2164 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
2165 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
2166 		strhup(stp);
2167 		mutex_exit(&stp->sd_lock);
2168 		return (0);
2169 
2170 	case M_UNHANGUP:
2171 		freemsg(bp);
2172 		mutex_enter(&stp->sd_lock);
2173 		stp->sd_werror = 0;
2174 		stp->sd_flag &= ~STRHUP;
2175 		mutex_exit(&stp->sd_lock);
2176 		return (0);
2177 
2178 	case M_SIG:
2179 		/*
2180 		 * Someone downstream wants to post a signal.  The
2181 		 * signal to post is contained in the first byte of the
2182 		 * message.  If the message would go on the front of
2183 		 * the queue, send a signal to the process group
2184 		 * (if not SIGPOLL) or to the siglist processes
2185 		 * (SIGPOLL).  If something is already on the queue,
2186 		 * OR if we are delivering a delayed suspend (*sigh*
2187 		 * another "tty" hack) and there's no one sleeping already,
2188 		 * just enqueue the message.
2189 		 */
2190 		mutex_enter(&stp->sd_lock);
2191 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2192 		    !(stp->sd_flag & RSLEEP))) {
2193 			(void) putq(q, bp);
2194 			mutex_exit(&stp->sd_lock);
2195 			return (0);
2196 		}
2197 		mutex_exit(&stp->sd_lock);
2198 		/* FALLTHRU */
2199 
2200 	case M_PCSIG:
2201 		/*
2202 		 * Don't enqueue, just post the signal.
2203 		 */
2204 		strsignal(stp, *bp->b_rptr, 0L);
2205 		freemsg(bp);
2206 		return (0);
2207 
2208 	case M_CMD:
2209 		if (MBLKL(bp) != sizeof (cmdblk_t)) {
2210 			freemsg(bp);
2211 			return (0);
2212 		}
2213 
2214 		mutex_enter(&stp->sd_lock);
2215 		if (stp->sd_flag & STRCMDWAIT) {
2216 			ASSERT(stp->sd_cmdblk == NULL);
2217 			stp->sd_cmdblk = bp;
2218 			cv_broadcast(&stp->sd_monitor);
2219 			mutex_exit(&stp->sd_lock);
2220 		} else {
2221 			mutex_exit(&stp->sd_lock);
2222 			freemsg(bp);
2223 		}
2224 		return (0);
2225 
2226 	case M_FLUSH:
2227 		/*
2228 		 * Flush queues.  The indication of which queues to flush
2229 		 * is in the first byte of the message.  If the read queue
2230 		 * is specified, then flush it.  If FLUSHBAND is set, just
2231 		 * flush the band specified by the second byte of the message.
2232 		 *
2233 		 * If a module has issued a M_SETOPT to not flush hi
2234 		 * priority messages off of the stream head, then pass this
2235 		 * flag into the flushq code to preserve such messages.
2236 		 */
2237 
2238 		if (*bp->b_rptr & FLUSHR) {
2239 			mutex_enter(&stp->sd_lock);
2240 			if (*bp->b_rptr & FLUSHBAND) {
2241 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2242 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2243 			} else
2244 				flushq_common(q, FLUSHALL,
2245 				    stp->sd_read_opt & RFLUSHPCPROT);
2246 			if ((q->q_first == NULL) ||
2247 			    (q->q_first->b_datap->db_type < QPCTL))
2248 				stp->sd_flag &= ~STRPRI;
2249 			else {
2250 				ASSERT(stp->sd_flag & STRPRI);
2251 			}
2252 			mutex_exit(&stp->sd_lock);
2253 		}
2254 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2255 			*bp->b_rptr &= ~FLUSHR;
2256 			bp->b_flag |= MSGNOLOOP;
2257 			/*
2258 			 * Protect against the driver passing up
2259 			 * messages after it has done a qprocsoff.
2260 			 */
2261 			if (_OTHERQ(q)->q_next == NULL)
2262 				freemsg(bp);
2263 			else
2264 				qreply(q, bp);
2265 			return (0);
2266 		}
2267 		freemsg(bp);
2268 		return (0);
2269 
2270 	case M_IOCACK:
2271 	case M_IOCNAK:
2272 		iocbp = (struct iocblk *)bp->b_rptr;
2273 		/*
2274 		 * If not waiting for ACK or NAK then just free msg.
2275 		 * If incorrect id sequence number then just free msg.
2276 		 * If already have ACK or NAK for user then this is a
2277 		 *    duplicate, display a warning and free the msg.
2278 		 */
2279 		mutex_enter(&stp->sd_lock);
2280 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2281 		    (stp->sd_iocid != iocbp->ioc_id)) {
2282 			/*
2283 			 * If the ACK/NAK is a dup, display a message
2284 			 * Dup is when sd_iocid == ioc_id, and
2285 			 * sd_iocblk == <valid ptr> or -1 (the former
2286 			 * is when an ioctl has been put on the stream
2287 			 * head, but has not yet been consumed, the
2288 			 * later is when it has been consumed).
2289 			 */
2290 			if ((stp->sd_iocid == iocbp->ioc_id) &&
2291 			    (stp->sd_iocblk != NULL)) {
2292 				log_dupioc(q, bp);
2293 			}
2294 			freemsg(bp);
2295 			mutex_exit(&stp->sd_lock);
2296 			return (0);
2297 		}
2298 
2299 		/*
2300 		 * Assign ACK or NAK to user and wake up.
2301 		 */
2302 		stp->sd_iocblk = bp;
2303 		cv_broadcast(&stp->sd_monitor);
2304 		mutex_exit(&stp->sd_lock);
2305 		return (0);
2306 
2307 	case M_COPYIN:
2308 	case M_COPYOUT:
2309 		reqp = (struct copyreq *)bp->b_rptr;
2310 
2311 		/*
2312 		 * If not waiting for ACK or NAK then just fail request.
2313 		 * If already have ACK, NAK, or copy request, then just
2314 		 * fail request.
2315 		 * If incorrect id sequence number then just fail request.
2316 		 */
2317 		mutex_enter(&stp->sd_lock);
2318 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2319 		    (stp->sd_iocid != reqp->cq_id)) {
2320 			if (bp->b_cont) {
2321 				freemsg(bp->b_cont);
2322 				bp->b_cont = NULL;
2323 			}
2324 			bp->b_datap->db_type = M_IOCDATA;
2325 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2326 			resp = (struct copyresp *)bp->b_rptr;
2327 			resp->cp_rval = (caddr_t)1;	/* failure */
2328 			mutex_exit(&stp->sd_lock);
2329 			putnext(stp->sd_wrq, bp);
2330 			return (0);
2331 		}
2332 
2333 		/*
2334 		 * Assign copy request to user and wake up.
2335 		 */
2336 		stp->sd_iocblk = bp;
2337 		cv_broadcast(&stp->sd_monitor);
2338 		mutex_exit(&stp->sd_lock);
2339 		return (0);
2340 
2341 	case M_SETOPTS:
2342 		/*
2343 		 * Set stream head options (read option, write offset,
2344 		 * min/max packet size, and/or high/low water marks for
2345 		 * the read side only).
2346 		 */
2347 
2348 		bpri = 0;
2349 		sop = (struct stroptions *)bp->b_rptr;
2350 		mutex_enter(&stp->sd_lock);
2351 		if (sop->so_flags & SO_READOPT) {
2352 			switch (sop->so_readopt & RMODEMASK) {
2353 			case RNORM:
2354 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2355 				break;
2356 
2357 			case RMSGD:
2358 				stp->sd_read_opt =
2359 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
2360 				    RD_MSGDIS);
2361 				break;
2362 
2363 			case RMSGN:
2364 				stp->sd_read_opt =
2365 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
2366 				    RD_MSGNODIS);
2367 				break;
2368 			}
2369 			switch (sop->so_readopt & RPROTMASK) {
2370 			case RPROTNORM:
2371 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2372 				break;
2373 
2374 			case RPROTDAT:
2375 				stp->sd_read_opt =
2376 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
2377 				    RD_PROTDAT);
2378 				break;
2379 
2380 			case RPROTDIS:
2381 				stp->sd_read_opt =
2382 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
2383 				    RD_PROTDIS);
2384 				break;
2385 			}
2386 			switch (sop->so_readopt & RFLUSHMASK) {
2387 			case RFLUSHPCPROT:
2388 				/*
2389 				 * This sets the stream head to NOT flush
2390 				 * M_PCPROTO messages.
2391 				 */
2392 				stp->sd_read_opt |= RFLUSHPCPROT;
2393 				break;
2394 			}
2395 		}
2396 		if (sop->so_flags & SO_ERROPT) {
2397 			switch (sop->so_erropt & RERRMASK) {
2398 			case RERRNORM:
2399 				stp->sd_flag &= ~STRDERRNONPERSIST;
2400 				break;
2401 			case RERRNONPERSIST:
2402 				stp->sd_flag |= STRDERRNONPERSIST;
2403 				break;
2404 			}
2405 			switch (sop->so_erropt & WERRMASK) {
2406 			case WERRNORM:
2407 				stp->sd_flag &= ~STWRERRNONPERSIST;
2408 				break;
2409 			case WERRNONPERSIST:
2410 				stp->sd_flag |= STWRERRNONPERSIST;
2411 				break;
2412 			}
2413 		}
2414 		if (sop->so_flags & SO_COPYOPT) {
2415 			if (sop->so_copyopt & ZCVMSAFE) {
2416 				stp->sd_copyflag |= STZCVMSAFE;
2417 				stp->sd_copyflag &= ~STZCVMUNSAFE;
2418 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
2419 				stp->sd_copyflag |= STZCVMUNSAFE;
2420 				stp->sd_copyflag &= ~STZCVMSAFE;
2421 			}
2422 
2423 			if (sop->so_copyopt & COPYCACHED) {
2424 				stp->sd_copyflag |= STRCOPYCACHED;
2425 			}
2426 		}
2427 		if (sop->so_flags & SO_WROFF)
2428 			stp->sd_wroff = sop->so_wroff;
2429 		if (sop->so_flags & SO_TAIL)
2430 			stp->sd_tail = sop->so_tail;
2431 		if (sop->so_flags & SO_MINPSZ)
2432 			q->q_minpsz = sop->so_minpsz;
2433 		if (sop->so_flags & SO_MAXPSZ)
2434 			q->q_maxpsz = sop->so_maxpsz;
2435 		if (sop->so_flags & SO_MAXBLK)
2436 			stp->sd_maxblk = sop->so_maxblk;
2437 		if (sop->so_flags & SO_HIWAT) {
2438 			if (sop->so_flags & SO_BAND) {
2439 				if (strqset(q, QHIWAT,
2440 				    sop->so_band, sop->so_hiwat)) {
2441 					cmn_err(CE_WARN, "strrput: could not "
2442 					    "allocate qband\n");
2443 				} else {
2444 					bpri = sop->so_band;
2445 				}
2446 			} else {
2447 				q->q_hiwat = sop->so_hiwat;
2448 			}
2449 		}
2450 		if (sop->so_flags & SO_LOWAT) {
2451 			if (sop->so_flags & SO_BAND) {
2452 				if (strqset(q, QLOWAT,
2453 				    sop->so_band, sop->so_lowat)) {
2454 					cmn_err(CE_WARN, "strrput: could not "
2455 					    "allocate qband\n");
2456 				} else {
2457 					bpri = sop->so_band;
2458 				}
2459 			} else {
2460 				q->q_lowat = sop->so_lowat;
2461 			}
2462 		}
2463 		if (sop->so_flags & SO_MREADON)
2464 			stp->sd_flag |= SNDMREAD;
2465 		if (sop->so_flags & SO_MREADOFF)
2466 			stp->sd_flag &= ~SNDMREAD;
2467 		if (sop->so_flags & SO_NDELON)
2468 			stp->sd_flag |= OLDNDELAY;
2469 		if (sop->so_flags & SO_NDELOFF)
2470 			stp->sd_flag &= ~OLDNDELAY;
2471 		if (sop->so_flags & SO_ISTTY)
2472 			stp->sd_flag |= STRISTTY;
2473 		if (sop->so_flags & SO_ISNTTY)
2474 			stp->sd_flag &= ~STRISTTY;
2475 		if (sop->so_flags & SO_TOSTOP)
2476 			stp->sd_flag |= STRTOSTOP;
2477 		if (sop->so_flags & SO_TONSTOP)
2478 			stp->sd_flag &= ~STRTOSTOP;
2479 		if (sop->so_flags & SO_DELIM)
2480 			stp->sd_flag |= STRDELIM;
2481 		if (sop->so_flags & SO_NODELIM)
2482 			stp->sd_flag &= ~STRDELIM;
2483 
2484 		mutex_exit(&stp->sd_lock);
2485 		freemsg(bp);
2486 
2487 		/* Check backenable in case the water marks changed */
2488 		qbackenable(q, bpri);
2489 		return (0);
2490 
2491 	/*
2492 	 * The following set of cases deal with situations where two stream
2493 	 * heads are connected to each other (twisted streams).  These messages
2494 	 * have no meaning at the stream head.
2495 	 */
2496 	case M_BREAK:
2497 	case M_CTL:
2498 	case M_DELAY:
2499 	case M_START:
2500 	case M_STOP:
2501 	case M_IOCDATA:
2502 	case M_STARTI:
2503 	case M_STOPI:
2504 		freemsg(bp);
2505 		return (0);
2506 
2507 	case M_IOCTL:
2508 		/*
2509 		 * Always NAK this condition
2510 		 * (makes no sense)
2511 		 * If there is one or more threads in the read side
2512 		 * rwnext we have to defer the nacking until that thread
2513 		 * returns (in strget).
2514 		 */
2515 		mutex_enter(&stp->sd_lock);
2516 		if (stp->sd_struiodnak != 0) {
2517 			/*
2518 			 * Defer NAK to the streamhead. Queue at the end
2519 			 * the list.
2520 			 */
2521 			mblk_t *mp = stp->sd_struionak;
2522 
2523 			while (mp && mp->b_next)
2524 				mp = mp->b_next;
2525 			if (mp)
2526 				mp->b_next = bp;
2527 			else
2528 				stp->sd_struionak = bp;
2529 			bp->b_next = NULL;
2530 			mutex_exit(&stp->sd_lock);
2531 			return (0);
2532 		}
2533 		mutex_exit(&stp->sd_lock);
2534 
2535 		bp->b_datap->db_type = M_IOCNAK;
2536 		/*
2537 		 * Protect against the driver passing up
2538 		 * messages after it has done a qprocsoff.
2539 		 */
2540 		if (_OTHERQ(q)->q_next == NULL)
2541 			freemsg(bp);
2542 		else
2543 			qreply(q, bp);
2544 		return (0);
2545 
2546 	default:
2547 #ifdef DEBUG
2548 		cmn_err(CE_WARN,
2549 		    "bad message type %x received at stream head\n",
2550 		    bp->b_datap->db_type);
2551 #endif
2552 		freemsg(bp);
2553 		return (0);
2554 	}
2555 
2556 	/* NOTREACHED */
2557 }
2558 
2559 /*
2560  * Check if the stream pointed to by `stp' can be written to, and return an
2561  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2562  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2563  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2564  */
2565 static int
strwriteable(struct stdata * stp,boolean_t eiohup,boolean_t sigpipeok)2566 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2567 {
2568 	int error;
2569 
2570 	ASSERT(MUTEX_HELD(&stp->sd_lock));
2571 
2572 	/*
2573 	 * For modem support, POSIX states that on writes, EIO should
2574 	 * be returned if the stream has been hung up.
2575 	 */
2576 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2577 		error = EIO;
2578 	else
2579 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2580 
2581 	if (error != 0) {
2582 		if (!(stp->sd_flag & STPLEX) &&
2583 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2584 			tsignal(curthread, SIGPIPE);
2585 			error = EPIPE;
2586 		}
2587 	}
2588 
2589 	return (error);
2590 }
2591 
2592 /*
2593  * Copyin and send data down a stream.
2594  * The caller will allocate and copyin any control part that precedes the
2595  * message and pass that in as mctl.
2596  *
2597  * Caller should *not* hold sd_lock.
2598  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2599  * under sd_lock in order to avoid missing a backenabling wakeup.
2600  *
2601  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2602  *
2603  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2604  * For sync streams we can only ignore flow control by reverting to using
2605  * putnext.
2606  *
2607  * If sd_maxblk is less than *iosize this routine might return without
2608  * transferring all of *iosize. In all cases, on return *iosize will contain
2609  * the amount of data that was transferred.
2610  */
2611 static int
strput(struct stdata * stp,mblk_t * mctl,struct uio * uiop,ssize_t * iosize,int b_flag,int pri,int flags)2612 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2613     int b_flag, int pri, int flags)
2614 {
2615 	struiod_t uiod;
2616 	struct iovec buf[IOV_MAX_STACK];
2617 	int iovlen = 0;
2618 	mblk_t *mp;
2619 	queue_t *wqp = stp->sd_wrq;
2620 	int error = 0;
2621 	ssize_t count = *iosize;
2622 
2623 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2624 
2625 	if (uiop != NULL && count >= 0)
2626 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2627 
2628 	if (!(flags & STRUIO_POSTPONE)) {
2629 		/*
2630 		 * Use regular canputnext, strmakedata, putnext sequence.
2631 		 */
2632 		if (pri == 0) {
2633 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2634 				freemsg(mctl);
2635 				return (EWOULDBLOCK);
2636 			}
2637 		} else {
2638 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2639 				freemsg(mctl);
2640 				return (EWOULDBLOCK);
2641 			}
2642 		}
2643 
2644 		if ((error = strmakedata(iosize, uiop, stp, flags,
2645 		    &mp)) != 0) {
2646 			freemsg(mctl);
2647 			/*
2648 			 * need to change return code to ENOMEM
2649 			 * so that this is not confused with
2650 			 * flow control, EAGAIN.
2651 			 */
2652 
2653 			if (error == EAGAIN)
2654 				return (ENOMEM);
2655 			else
2656 				return (error);
2657 		}
2658 		if (mctl != NULL) {
2659 			if (mctl->b_cont == NULL)
2660 				mctl->b_cont = mp;
2661 			else if (mp != NULL)
2662 				linkb(mctl, mp);
2663 			mp = mctl;
2664 		} else if (mp == NULL)
2665 			return (0);
2666 
2667 		mp->b_flag |= b_flag;
2668 		mp->b_band = (uchar_t)pri;
2669 
2670 		if (flags & MSG_IGNFLOW) {
2671 			/*
2672 			 * XXX Hack: Don't get stuck running service
2673 			 * procedures. This is needed for sockfs when
2674 			 * sending the unbind message out of the rput
2675 			 * procedure - we don't want a put procedure
2676 			 * to run service procedures.
2677 			 */
2678 			putnext(wqp, mp);
2679 		} else {
2680 			stream_willservice(stp);
2681 			putnext(wqp, mp);
2682 			stream_runservice(stp);
2683 		}
2684 		return (0);
2685 	}
2686 	/*
2687 	 * Stream supports rwnext() for the write side.
2688 	 */
2689 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2690 		freemsg(mctl);
2691 		/*
2692 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2693 		 */
2694 		return (error == EAGAIN ? ENOMEM : error);
2695 	}
2696 	if (mctl != NULL) {
2697 		if (mctl->b_cont == NULL)
2698 			mctl->b_cont = mp;
2699 		else if (mp != NULL)
2700 			linkb(mctl, mp);
2701 		mp = mctl;
2702 	} else if (mp == NULL) {
2703 		return (0);
2704 	}
2705 
2706 	mp->b_flag |= b_flag;
2707 	mp->b_band = (uchar_t)pri;
2708 
2709 	if (uiop->uio_iovcnt > IOV_MAX_STACK) {
2710 		iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
2711 		uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
2712 	} else {
2713 		uiod.d_iov = buf;
2714 	}
2715 
2716 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
2717 	uiod.d_uio.uio_offset = 0;
2718 	uiod.d_mp = mp;
2719 	error = rwnext(wqp, &uiod);
2720 	if (! uiod.d_mp) {
2721 		uioskip(uiop, *iosize);
2722 		if (iovlen != 0)
2723 			kmem_free(uiod.d_iov, iovlen);
2724 		return (error);
2725 	}
2726 	ASSERT(mp == uiod.d_mp);
2727 	if (error == EINVAL) {
2728 		/*
2729 		 * The stream plumbing must have changed while
2730 		 * we were away, so just turn off rwnext()s.
2731 		 */
2732 		error = 0;
2733 	} else if (error == EBUSY || error == EWOULDBLOCK) {
2734 		/*
2735 		 * Couldn't enter a perimeter or took a page fault,
2736 		 * so fall-back to putnext().
2737 		 */
2738 		error = 0;
2739 	} else {
2740 		freemsg(mp);
2741 		if (iovlen != 0)
2742 			kmem_free(uiod.d_iov, iovlen);
2743 		return (error);
2744 	}
2745 	/* Have to check canput before consuming data from the uio */
2746 	if (pri == 0) {
2747 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2748 			freemsg(mp);
2749 			if (iovlen != 0)
2750 				kmem_free(uiod.d_iov, iovlen);
2751 			return (EWOULDBLOCK);
2752 		}
2753 	} else {
2754 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2755 			freemsg(mp);
2756 			if (iovlen != 0)
2757 				kmem_free(uiod.d_iov, iovlen);
2758 			return (EWOULDBLOCK);
2759 		}
2760 	}
2761 	ASSERT(mp == uiod.d_mp);
2762 	/* Copyin data from the uio */
2763 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2764 		freemsg(mp);
2765 		if (iovlen != 0)
2766 			kmem_free(uiod.d_iov, iovlen);
2767 		return (error);
2768 	}
2769 	uioskip(uiop, *iosize);
2770 	if (flags & MSG_IGNFLOW) {
2771 		/*
2772 		 * XXX Hack: Don't get stuck running service procedures.
2773 		 * This is needed for sockfs when sending the unbind message
2774 		 * out of the rput procedure - we don't want a put procedure
2775 		 * to run service procedures.
2776 		 */
2777 		putnext(wqp, mp);
2778 	} else {
2779 		stream_willservice(stp);
2780 		putnext(wqp, mp);
2781 		stream_runservice(stp);
2782 	}
2783 	if (iovlen != 0)
2784 		kmem_free(uiod.d_iov, iovlen);
2785 	return (0);
2786 }
2787 
2788 /*
2789  * Write attempts to break the write request into messages conforming
2790  * with the minimum and maximum packet sizes set downstream.
2791  *
2792  * Write will not block if downstream queue is full and
2793  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2794  *
2795  * A write of zero bytes gets packaged into a zero length message and sent
2796  * downstream like any other message.
2797  *
2798  * If buffers of the requested sizes are not available, the write will
2799  * sleep until the buffers become available.
2800  *
2801  * Write (if specified) will supply a write offset in a message if it
2802  * makes sense. This can be specified by downstream modules as part of
2803  * a M_SETOPTS message.  Write will not supply the write offset if it
2804  * cannot supply any data in a buffer.  In other words, write will never
2805  * send down an empty packet due to a write offset.
2806  */
2807 /* ARGSUSED2 */
2808 int
strwrite(struct vnode * vp,struct uio * uiop,cred_t * crp)2809 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2810 {
2811 	return (strwrite_common(vp, uiop, crp, 0));
2812 }
2813 
2814 /* ARGSUSED2 */
2815 int
strwrite_common(struct vnode * vp,struct uio * uiop,cred_t * crp,int wflag)2816 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2817 {
2818 	struct stdata *stp;
2819 	struct queue *wqp;
2820 	ssize_t rmin, rmax;
2821 	ssize_t iosize;
2822 	int waitflag;
2823 	int tempmode;
2824 	int error = 0;
2825 	int b_flag;
2826 
2827 	ASSERT(vp->v_stream);
2828 	stp = vp->v_stream;
2829 
2830 	mutex_enter(&stp->sd_lock);
2831 
2832 	if ((error = i_straccess(stp, JCWRITE)) != 0) {
2833 		mutex_exit(&stp->sd_lock);
2834 		return (error);
2835 	}
2836 
2837 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2838 		error = strwriteable(stp, B_TRUE, B_TRUE);
2839 		if (error != 0) {
2840 			mutex_exit(&stp->sd_lock);
2841 			return (error);
2842 		}
2843 	}
2844 
2845 	mutex_exit(&stp->sd_lock);
2846 
2847 	wqp = stp->sd_wrq;
2848 
2849 	/* get these values from them cached in the stream head */
2850 	rmin = stp->sd_qn_minpsz;
2851 	rmax = stp->sd_qn_maxpsz;
2852 
2853 	/*
2854 	 * Check the min/max packet size constraints.  If min packet size
2855 	 * is non-zero, the write cannot be split into multiple messages
2856 	 * and still guarantee the size constraints.
2857 	 */
2858 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2859 
2860 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
2861 	if (rmax == 0) {
2862 		return (0);
2863 	}
2864 	if (rmin > 0) {
2865 		if (uiop->uio_resid < rmin) {
2866 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2867 			    "strwrite out:q %p out %d error %d",
2868 			    wqp, 0, ERANGE);
2869 			return (ERANGE);
2870 		}
2871 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2872 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2873 			    "strwrite out:q %p out %d error %d",
2874 			    wqp, 1, ERANGE);
2875 			return (ERANGE);
2876 		}
2877 	}
2878 
2879 	/*
2880 	 * Do until count satisfied or error.
2881 	 */
2882 	waitflag = WRITEWAIT | wflag;
2883 	if (stp->sd_flag & OLDNDELAY)
2884 		tempmode = uiop->uio_fmode & ~FNDELAY;
2885 	else
2886 		tempmode = uiop->uio_fmode;
2887 
2888 	if (rmax == INFPSZ)
2889 		rmax = uiop->uio_resid;
2890 
2891 	/*
2892 	 * Note that tempmode does not get used in strput/strmakedata
2893 	 * but only in strwaitq. The other routines use uio_fmode
2894 	 * unmodified.
2895 	 */
2896 
2897 	/* LINTED: constant in conditional context */
2898 	while (1) {	/* breaks when uio_resid reaches zero */
2899 		/*
2900 		 * Determine the size of the next message to be
2901 		 * packaged.  May have to break write into several
2902 		 * messages based on max packet size.
2903 		 */
2904 		iosize = MIN(uiop->uio_resid, rmax);
2905 
2906 		/*
2907 		 * Put block downstream when flow control allows it.
2908 		 */
2909 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2910 			b_flag = MSGDELIM;
2911 		else
2912 			b_flag = 0;
2913 
2914 		for (;;) {
2915 			int done = 0;
2916 
2917 			error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
2918 			if (error == 0)
2919 				break;
2920 			if (error != EWOULDBLOCK)
2921 				goto out;
2922 
2923 			mutex_enter(&stp->sd_lock);
2924 			/*
2925 			 * Check for a missed wakeup.
2926 			 * Needed since strput did not hold sd_lock across
2927 			 * the canputnext.
2928 			 */
2929 			if (canputnext(wqp)) {
2930 				/* Try again */
2931 				mutex_exit(&stp->sd_lock);
2932 				continue;
2933 			}
2934 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2935 			    "strwrite wait:q %p wait", wqp);
2936 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2937 			    tempmode, -1, &done)) != 0 || done) {
2938 				mutex_exit(&stp->sd_lock);
2939 				if ((vp->v_type == VFIFO) &&
2940 				    (uiop->uio_fmode & FNDELAY) &&
2941 				    (error == EAGAIN))
2942 					error = 0;
2943 				goto out;
2944 			}
2945 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2946 			    "strwrite wake:q %p awakes", wqp);
2947 			if ((error = i_straccess(stp, JCWRITE)) != 0) {
2948 				mutex_exit(&stp->sd_lock);
2949 				goto out;
2950 			}
2951 			mutex_exit(&stp->sd_lock);
2952 		}
2953 		waitflag |= NOINTR;
2954 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2955 		    "strwrite resid:q %p uiop %p", wqp, uiop);
2956 		if (uiop->uio_resid) {
2957 			/* Recheck for errors - needed for sockets */
2958 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2959 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2960 				mutex_enter(&stp->sd_lock);
2961 				error = strwriteable(stp, B_FALSE, B_TRUE);
2962 				mutex_exit(&stp->sd_lock);
2963 				if (error != 0)
2964 					return (error);
2965 			}
2966 			continue;
2967 		}
2968 		break;
2969 	}
2970 out:
2971 	/*
2972 	 * For historical reasons, applications expect EAGAIN when a data
2973 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2974 	 */
2975 	if (error == ENOMEM)
2976 		error = EAGAIN;
2977 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2978 	    "strwrite out:q %p out %d error %d", wqp, 2, error);
2979 	return (error);
2980 }
2981 
2982 /*
2983  * Stream head write service routine.
2984  * Its job is to wake up any sleeping writers when a queue
2985  * downstream needs data (part of the flow control in putq and getq).
2986  * It also must wake anyone sleeping on a poll().
2987  * For stream head right below mux module, it must also invoke put procedure
2988  * of next downstream module.
2989  */
2990 int
strwsrv(queue_t * q)2991 strwsrv(queue_t *q)
2992 {
2993 	struct stdata *stp;
2994 	queue_t *tq;
2995 	qband_t *qbp;
2996 	int i;
2997 	qband_t *myqbp;
2998 	int isevent;
2999 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
3000 
3001 	TRACE_1(TR_FAC_STREAMS_FR,
3002 	    TR_STRWSRV, "strwsrv:q %p", q);
3003 	stp = (struct stdata *)q->q_ptr;
3004 	ASSERT(qclaimed(q));
3005 	mutex_enter(&stp->sd_lock);
3006 	ASSERT(!(stp->sd_flag & STPLEX));
3007 
3008 	if (stp->sd_flag & WSLEEP) {
3009 		stp->sd_flag &= ~WSLEEP;
3010 		cv_broadcast(&q->q_wait);
3011 	}
3012 	mutex_exit(&stp->sd_lock);
3013 
3014 	/* The other end of a stream pipe went away. */
3015 	if ((tq = q->q_next) == NULL) {
3016 		return (0);
3017 	}
3018 
3019 	/* Find the next module forward that has a service procedure */
3020 	claimstr(q);
3021 	tq = q->q_nfsrv;
3022 	ASSERT(tq != NULL);
3023 
3024 	if ((q->q_flag & QBACK)) {
3025 		if ((tq->q_flag & QFULL)) {
3026 			mutex_enter(QLOCK(tq));
3027 			if (!(tq->q_flag & QFULL)) {
3028 				mutex_exit(QLOCK(tq));
3029 				goto wakeup;
3030 			}
3031 			/*
3032 			 * The queue must have become full again. Set QWANTW
3033 			 * again so strwsrv will be back enabled when
3034 			 * the queue becomes non-full next time.
3035 			 */
3036 			tq->q_flag |= QWANTW;
3037 			mutex_exit(QLOCK(tq));
3038 		} else {
3039 		wakeup:
3040 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
3041 			mutex_enter(&stp->sd_lock);
3042 			if (stp->sd_sigflags & S_WRNORM)
3043 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3044 			mutex_exit(&stp->sd_lock);
3045 		}
3046 	}
3047 
3048 	isevent = 0;
3049 	i = 1;
3050 	bzero((caddr_t)qbf, NBAND);
3051 	mutex_enter(QLOCK(tq));
3052 	if ((myqbp = q->q_bandp) != NULL)
3053 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
3054 			ASSERT(myqbp);
3055 			if ((myqbp->qb_flag & QB_BACK)) {
3056 				if (qbp->qb_flag & QB_FULL) {
3057 					/*
3058 					 * The band must have become full again.
3059 					 * Set QB_WANTW again so strwsrv will
3060 					 * be back enabled when the band becomes
3061 					 * non-full next time.
3062 					 */
3063 					qbp->qb_flag |= QB_WANTW;
3064 				} else {
3065 					isevent = 1;
3066 					qbf[i] = 1;
3067 				}
3068 			}
3069 			myqbp = myqbp->qb_next;
3070 			i++;
3071 		}
3072 	mutex_exit(QLOCK(tq));
3073 
3074 	if (isevent) {
3075 		for (i = tq->q_nband; i; i--) {
3076 			if (qbf[i]) {
3077 				pollwakeup(&stp->sd_pollist, POLLWRBAND);
3078 				mutex_enter(&stp->sd_lock);
3079 				if (stp->sd_sigflags & S_WRBAND)
3080 					strsendsig(stp->sd_siglist, S_WRBAND,
3081 					    (uchar_t)i, 0);
3082 				mutex_exit(&stp->sd_lock);
3083 			}
3084 		}
3085 	}
3086 
3087 	releasestr(q);
3088 	return (0);
3089 }
3090 
3091 /*
3092  * Special case of strcopyin/strcopyout for copying
3093  * struct strioctl that can deal with both data
3094  * models.
3095  */
3096 
3097 #ifdef	_LP64
3098 
3099 static int
strcopyin_strioctl(void * from,void * to,int flag,int copyflag)3100 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3101 {
3102 	struct	strioctl32 strioc32;
3103 	struct	strioctl *striocp;
3104 
3105 	if (copyflag & U_TO_K) {
3106 		ASSERT((copyflag & K_TO_K) == 0);
3107 
3108 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3109 			if (copyin(from, &strioc32, sizeof (strioc32)))
3110 				return (EFAULT);
3111 
3112 			striocp = (struct strioctl *)to;
3113 			striocp->ic_cmd	= strioc32.ic_cmd;
3114 			striocp->ic_timout = strioc32.ic_timout;
3115 			striocp->ic_len	= strioc32.ic_len;
3116 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
3117 
3118 		} else { /* NATIVE data model */
3119 			if (copyin(from, to, sizeof (struct strioctl))) {
3120 				return (EFAULT);
3121 			} else {
3122 				return (0);
3123 			}
3124 		}
3125 	} else {
3126 		ASSERT(copyflag & K_TO_K);
3127 		bcopy(from, to, sizeof (struct strioctl));
3128 	}
3129 	return (0);
3130 }
3131 
3132 static int
strcopyout_strioctl(void * from,void * to,int flag,int copyflag)3133 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3134 {
3135 	struct	strioctl32 strioc32;
3136 	struct	strioctl *striocp;
3137 
3138 	if (copyflag & U_TO_K) {
3139 		ASSERT((copyflag & K_TO_K) == 0);
3140 
3141 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3142 			striocp = (struct strioctl *)from;
3143 			strioc32.ic_cmd	= striocp->ic_cmd;
3144 			strioc32.ic_timout = striocp->ic_timout;
3145 			strioc32.ic_len	= striocp->ic_len;
3146 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
3147 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3148 			    striocp->ic_dp);
3149 
3150 			if (copyout(&strioc32, to, sizeof (strioc32)))
3151 				return (EFAULT);
3152 
3153 		} else { /* NATIVE data model */
3154 			if (copyout(from, to, sizeof (struct strioctl))) {
3155 				return (EFAULT);
3156 			} else {
3157 				return (0);
3158 			}
3159 		}
3160 	} else {
3161 		ASSERT(copyflag & K_TO_K);
3162 		bcopy(from, to, sizeof (struct strioctl));
3163 	}
3164 	return (0);
3165 }
3166 
3167 #else	/* ! _LP64 */
3168 
3169 /* ARGSUSED2 */
3170 static int
strcopyin_strioctl(void * from,void * to,int flag,int copyflag)3171 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3172 {
3173 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3174 }
3175 
3176 /* ARGSUSED2 */
3177 static int
strcopyout_strioctl(void * from,void * to,int flag,int copyflag)3178 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3179 {
3180 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3181 }
3182 
3183 #endif	/* _LP64 */
3184 
3185 /*
3186  * Determine type of job control semantics expected by user.  The
3187  * possibilities are:
3188  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
3189  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
3190  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3191  *	JCGETP	- Gets a value in the stream; no signals.
3192  * See straccess in strsubr.c for usage of these values.
3193  *
3194  * This routine also returns -1 for I_STR as a special case; the
3195  * caller must call again with the real ioctl number for
3196  * classification.
3197  */
3198 static int
job_control_type(int cmd)3199 job_control_type(int cmd)
3200 {
3201 	switch (cmd) {
3202 	case I_STR:
3203 		return (-1);
3204 
3205 	case I_RECVFD:
3206 	case I_E_RECVFD:
3207 		return (JCREAD);
3208 
3209 	case I_FDINSERT:
3210 	case I_SENDFD:
3211 		return (JCWRITE);
3212 
3213 	case TCSETA:
3214 	case TCSETAW:
3215 	case TCSETAF:
3216 	case TCSBRK:
3217 	case TCXONC:
3218 	case TCFLSH:
3219 	case TCDSET:	/* Obsolete */
3220 	case TIOCSWINSZ:
3221 	case TCSETS:
3222 	case TCSETSW:
3223 	case TCSETSF:
3224 	case TIOCSETD:
3225 	case TIOCHPCL:
3226 	case TIOCSETP:
3227 	case TIOCSETN:
3228 	case TIOCEXCL:
3229 	case TIOCNXCL:
3230 	case TIOCFLUSH:
3231 	case TIOCSETC:
3232 	case TIOCLBIS:
3233 	case TIOCLBIC:
3234 	case TIOCLSET:
3235 	case TIOCSBRK:
3236 	case TIOCCBRK:
3237 	case TIOCSDTR:
3238 	case TIOCCDTR:
3239 	case TIOCSLTC:
3240 	case TIOCSTOP:
3241 	case TIOCSTART:
3242 	case TIOCSTI:
3243 	case TIOCSPGRP:
3244 	case TIOCMSET:
3245 	case TIOCMBIS:
3246 	case TIOCMBIC:
3247 	case TIOCREMOTE:
3248 	case TIOCSIGNAL:
3249 	case LDSETT:
3250 	case LDSMAP:	/* Obsolete */
3251 	case DIOCSETP:
3252 	case I_FLUSH:
3253 	case I_SRDOPT:
3254 	case I_SETSIG:
3255 	case I_SWROPT:
3256 	case I_FLUSHBAND:
3257 	case I_SETCLTIME:
3258 	case I_SERROPT:
3259 	case I_ESETSIG:
3260 	case FIONBIO:
3261 	case FIOASYNC:
3262 	case FIOSETOWN:
3263 	case JBOOT:	/* Obsolete */
3264 	case JTERM:	/* Obsolete */
3265 	case JTIMOM:	/* Obsolete */
3266 	case JZOMBOOT:	/* Obsolete */
3267 	case JAGENT:	/* Obsolete */
3268 	case JTRUN:	/* Obsolete */
3269 	case JXTPROTO:	/* Obsolete */
3270 		return (JCSETP);
3271 	}
3272 
3273 	return (JCGETP);
3274 }
3275 
3276 /*
3277  * ioctl for streams
3278  */
3279 int
strioctl(struct vnode * vp,int cmd,intptr_t arg,int flag,int copyflag,cred_t * crp,int * rvalp)3280 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3281     cred_t *crp, int *rvalp)
3282 {
3283 	struct stdata *stp;
3284 	struct strcmd *scp;
3285 	struct strioctl strioc;
3286 	struct uio uio;
3287 	struct iovec iov;
3288 	int access;
3289 	mblk_t *mp;
3290 	int error = 0;
3291 	int done = 0;
3292 	ssize_t	rmin, rmax;
3293 	queue_t *wrq;
3294 	queue_t *rdq;
3295 	boolean_t kioctl = B_FALSE;
3296 	uint32_t auditing = AU_AUDITING();
3297 
3298 	if (flag & FKIOCTL) {
3299 		copyflag = K_TO_K;
3300 		kioctl = B_TRUE;
3301 	}
3302 	ASSERT(vp->v_stream);
3303 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3304 	stp = vp->v_stream;
3305 
3306 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3307 	    "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3308 
3309 	/*
3310 	 * If the copy is kernel to kernel, make sure that the FNATIVE
3311 	 * flag is set.  After this it would be a serious error to have
3312 	 * no model flag.
3313 	 */
3314 	if (copyflag == K_TO_K)
3315 		flag = (flag & ~FMODELS) | FNATIVE;
3316 
3317 	ASSERT((flag & FMODELS) != 0);
3318 
3319 	wrq = stp->sd_wrq;
3320 	rdq = _RD(wrq);
3321 
3322 	access = job_control_type(cmd);
3323 
3324 	/* We should never see these here, should be handled by iwscn */
3325 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3326 		return (EINVAL);
3327 
3328 	mutex_enter(&stp->sd_lock);
3329 	if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3330 		mutex_exit(&stp->sd_lock);
3331 		return (error);
3332 	}
3333 	mutex_exit(&stp->sd_lock);
3334 
3335 	/*
3336 	 * Check for sgttyb-related ioctls first, and complain as
3337 	 * necessary.
3338 	 */
3339 	switch (cmd) {
3340 	case TIOCGETP:
3341 	case TIOCSETP:
3342 	case TIOCSETN:
3343 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3344 			sgttyb_complaint = B_TRUE;
3345 			cmn_err(CE_NOTE,
3346 			    "application used obsolete TIOC[GS]ET");
3347 		}
3348 		if (sgttyb_handling >= 3) {
3349 			tsignal(curthread, SIGSYS);
3350 			return (EIO);
3351 		}
3352 		break;
3353 	}
3354 
3355 	mutex_enter(&stp->sd_lock);
3356 
3357 	switch (cmd) {
3358 	case I_RECVFD:
3359 	case I_E_RECVFD:
3360 	case I_PEEK:
3361 	case I_NREAD:
3362 	case FIONREAD:
3363 	case FIORDCHK:
3364 	case I_ATMARK:
3365 	case FIONBIO:
3366 	case FIOASYNC:
3367 		if (stp->sd_flag & (STRDERR|STPLEX)) {
3368 			error = strgeterr(stp, STRDERR|STPLEX, 0);
3369 			if (error != 0) {
3370 				mutex_exit(&stp->sd_lock);
3371 				return (error);
3372 			}
3373 		}
3374 		break;
3375 
3376 	default:
3377 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3378 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3379 			if (error != 0) {
3380 				mutex_exit(&stp->sd_lock);
3381 				return (error);
3382 			}
3383 		}
3384 	}
3385 
3386 	mutex_exit(&stp->sd_lock);
3387 
3388 	switch (cmd) {
3389 	default:
3390 		/*
3391 		 * The stream head has hardcoded knowledge of a
3392 		 * miscellaneous collection of terminal-, keyboard- and
3393 		 * mouse-related ioctls, enumerated below.  This hardcoded
3394 		 * knowledge allows the stream head to automatically
3395 		 * convert transparent ioctl requests made by userland
3396 		 * programs into I_STR ioctls which many old STREAMS
3397 		 * modules and drivers require.
3398 		 *
3399 		 * No new ioctls should ever be added to this list.
3400 		 * Instead, the STREAMS module or driver should be written
3401 		 * to either handle transparent ioctls or require any
3402 		 * userland programs to use I_STR ioctls (by returning
3403 		 * EINVAL to any transparent ioctl requests).
3404 		 *
3405 		 * More importantly, removing ioctls from this list should
3406 		 * be done with the utmost care, since our STREAMS modules
3407 		 * and drivers *count* on the stream head performing this
3408 		 * conversion, and thus may panic while processing
3409 		 * transparent ioctl request for one of these ioctls (keep
3410 		 * in mind that third party modules and drivers may have
3411 		 * similar problems).
3412 		 */
3413 		if (((cmd & IOCTYPE) == LDIOC) ||
3414 		    ((cmd & IOCTYPE) == tIOC) ||
3415 		    ((cmd & IOCTYPE) == TIOC) ||
3416 		    ((cmd & IOCTYPE) == KIOC) ||
3417 		    ((cmd & IOCTYPE) == MSIOC) ||
3418 		    ((cmd & IOCTYPE) == VUIOC)) {
3419 			/*
3420 			 * The ioctl is a tty ioctl - set up strioc buffer
3421 			 * and call strdoioctl() to do the work.
3422 			 */
3423 			if (stp->sd_flag & STRHUP)
3424 				return (ENXIO);
3425 			strioc.ic_cmd = cmd;
3426 			strioc.ic_timout = INFTIM;
3427 
3428 			switch (cmd) {
3429 
3430 			case TCXONC:
3431 			case TCSBRK:
3432 			case TCFLSH:
3433 			case TCDSET:
3434 				{
3435 				int native_arg = (int)arg;
3436 				strioc.ic_len = sizeof (int);
3437 				strioc.ic_dp = (char *)&native_arg;
3438 				return (strdoioctl(stp, &strioc, flag,
3439 				    K_TO_K, crp, rvalp));
3440 				}
3441 
3442 			case TCSETA:
3443 			case TCSETAW:
3444 			case TCSETAF:
3445 				strioc.ic_len = sizeof (struct termio);
3446 				strioc.ic_dp = (char *)arg;
3447 				return (strdoioctl(stp, &strioc, flag,
3448 				    copyflag, crp, rvalp));
3449 
3450 			case TCSETS:
3451 			case TCSETSW:
3452 			case TCSETSF:
3453 				strioc.ic_len = sizeof (struct termios);
3454 				strioc.ic_dp = (char *)arg;
3455 				return (strdoioctl(stp, &strioc, flag,
3456 				    copyflag, crp, rvalp));
3457 
3458 			case LDSETT:
3459 				strioc.ic_len = sizeof (struct termcb);
3460 				strioc.ic_dp = (char *)arg;
3461 				return (strdoioctl(stp, &strioc, flag,
3462 				    copyflag, crp, rvalp));
3463 
3464 			case TIOCSETP:
3465 				strioc.ic_len = sizeof (struct sgttyb);
3466 				strioc.ic_dp = (char *)arg;
3467 				return (strdoioctl(