1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 /*
25  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  *
28  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/thread.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stropts.h>
36 #include <sys/stream.h>
37 #include <sys/strsubr.h>
38 #include <sys/strsun.h>
39 #include <sys/conf.h>
40 #include <sys/debug.h>
41 #include <sys/cmn_err.h>
42 #include <sys/kmem.h>
43 #include <sys/atomic.h>
44 #include <sys/errno.h>
45 #include <sys/vtrace.h>
46 #include <sys/ftrace.h>
47 #include <sys/ontrap.h>
48 #include <sys/sdt.h>
49 #include <sys/strft.h>
50 
51 /*
52  * This file contains selected functions from io/stream.c
53  * needed by this library, mostly unmodified.
54  */
55 
56 /*
57  * STREAMS message allocator: principles of operation
58  * (See usr/src/uts/common/io/stream.c)
59  */
60 #define	DBLK_MAX_CACHE		73728
61 #define	DBLK_CACHE_ALIGN	64
62 #define	DBLK_MIN_SIZE		8
63 #define	DBLK_SIZE_SHIFT		3
64 
65 #ifdef _BIG_ENDIAN
66 #define	DBLK_RTFU_SHIFT(field)	\
67 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
68 #else
69 #define	DBLK_RTFU_SHIFT(field)	\
70 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
71 #endif
72 
73 #define	DBLK_RTFU(ref, type, flags, uioflag)	\
74 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
75 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
76 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
77 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
78 #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
79 #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
80 #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
81 
82 static size_t dblk_sizes[] = {
83 #ifdef _LP64
84 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
85 	8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
86 	40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
87 #else
88 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
89 	8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
90 	40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
91 #endif
92 	DBLK_MAX_CACHE, 0
93 };
94 
95 static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
96 static struct kmem_cache *mblk_cache;
97 static struct kmem_cache *dblk_esb_cache;
98 
99 static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
100 static mblk_t *allocb_oversize(size_t size, int flags);
101 static int allocb_tryhard_fails;
102 static void frnop_func(void *arg);
103 frtn_t frnop = { frnop_func };
104 static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
105 
106 /*
107  * Patchable mblk/dblk kmem_cache flags.
108  */
109 int dblk_kmem_flags = 0;
110 int mblk_kmem_flags = 0;
111 
112 static int
113 dblk_constructor(void *buf, void *cdrarg, int kmflags)
114 {
115 	dblk_t *dbp = buf;
116 	ssize_t msg_size = (ssize_t)cdrarg;
117 	size_t index;
118 
119 	ASSERT(msg_size != 0);
120 
121 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
122 
123 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
124 
125 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
126 		return (-1);
127 	if ((msg_size & PAGEOFFSET) == 0) {
128 		dbp->db_base = kmem_alloc(msg_size, kmflags);
129 		if (dbp->db_base == NULL) {
130 			kmem_cache_free(mblk_cache, dbp->db_mblk);
131 			return (-1);
132 		}
133 	} else {
134 		dbp->db_base = (unsigned char *)&dbp[1];
135 	}
136 
137 	dbp->db_mblk->b_datap = dbp;
138 	dbp->db_cache = dblk_cache[index];
139 	dbp->db_lim = dbp->db_base + msg_size;
140 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
141 	dbp->db_frtnp = NULL;
142 	dbp->db_fthdr = NULL;
143 	dbp->db_credp = NULL;
144 	dbp->db_cpid = -1;
145 	dbp->db_struioflag = 0;
146 	dbp->db_struioun.cksum.flags = 0;
147 	return (0);
148 }
149 
150 /*ARGSUSED*/
151 static int
152 dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
153 {
154 	dblk_t *dbp = buf;
155 
156 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
157 		return (-1);
158 	dbp->db_mblk->b_datap = dbp;
159 	dbp->db_cache = dblk_esb_cache;
160 	dbp->db_fthdr = NULL;
161 	dbp->db_credp = NULL;
162 	dbp->db_cpid = -1;
163 	dbp->db_struioflag = 0;
164 	dbp->db_struioun.cksum.flags = 0;
165 	return (0);
166 }
167 
168 static int
169 bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
170 {
171 	dblk_t *dbp = buf;
172 	bcache_t *bcp = cdrarg;
173 
174 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
175 		return (-1);
176 
177 	dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags);
178 	if (dbp->db_base == NULL) {
179 		kmem_cache_free(mblk_cache, dbp->db_mblk);
180 		return (-1);
181 	}
182 
183 	dbp->db_mblk->b_datap = dbp;
184 	dbp->db_cache = (void *)bcp;
185 	dbp->db_lim = dbp->db_base + bcp->size;
186 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
187 	dbp->db_frtnp = NULL;
188 	dbp->db_fthdr = NULL;
189 	dbp->db_credp = NULL;
190 	dbp->db_cpid = -1;
191 	dbp->db_struioflag = 0;
192 	dbp->db_struioun.cksum.flags = 0;
193 	return (0);
194 }
195 
196 /*ARGSUSED*/
197 static void
198 dblk_destructor(void *buf, void *cdrarg)
199 {
200 	dblk_t *dbp = buf;
201 	ssize_t msg_size = (ssize_t)cdrarg;
202 
203 	ASSERT(dbp->db_mblk->b_datap == dbp);
204 	ASSERT(msg_size != 0);
205 	ASSERT(dbp->db_struioflag == 0);
206 	ASSERT(dbp->db_struioun.cksum.flags == 0);
207 
208 	if ((msg_size & PAGEOFFSET) == 0) {
209 		kmem_free(dbp->db_base, msg_size);
210 	}
211 
212 	kmem_cache_free(mblk_cache, dbp->db_mblk);
213 }
214 
215 static void
216 bcache_dblk_destructor(void *buf, void *cdrarg)
217 {
218 	dblk_t *dbp = buf;
219 	bcache_t *bcp = cdrarg;
220 
221 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
222 
223 	ASSERT(dbp->db_mblk->b_datap == dbp);
224 	ASSERT(dbp->db_struioflag == 0);
225 	ASSERT(dbp->db_struioun.cksum.flags == 0);
226 
227 	kmem_cache_free(mblk_cache, dbp->db_mblk);
228 }
229 
230 /* Needed in the ASSERT below */
231 #ifdef	DEBUG
232 #ifdef	_KERNEL
233 #define	KMEM_SLAB_T_SZ	sizeof (kmem_slab_t)
234 #else	/* _KERNEL */
235 #define	KMEM_SLAB_T_SZ	64	/* fakekernel */
236 #endif	/* _KERNEL */
237 #endif	/* DEBUG */
238 
239 void
240 streams_msg_init(void)
241 {
242 	char name[40];
243 	size_t size;
244 	size_t lastsize = DBLK_MIN_SIZE;
245 	size_t *sizep;
246 	struct kmem_cache *cp;
247 	size_t tot_size;
248 	int offset;
249 
250 	mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32,
251 	    NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags);
252 
253 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
254 
255 		if ((offset = (size & PAGEOFFSET)) != 0) {
256 			/*
257 			 * We are in the middle of a page, dblk should
258 			 * be allocated on the same page
259 			 */
260 			tot_size = size + sizeof (dblk_t);
261 			ASSERT((offset + sizeof (dblk_t) + KMEM_SLAB_T_SZ)
262 			    < PAGESIZE);
263 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
264 
265 		} else {
266 
267 			/*
268 			 * buf size is multiple of page size, dblk and
269 			 * buffer are allocated separately.
270 			 */
271 
272 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
273 			tot_size = sizeof (dblk_t);
274 		}
275 
276 		(void) sprintf(name, "streams_dblk_%ld", (long)size);
277 		cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN,
278 		    dblk_constructor, dblk_destructor, NULL, (void *)(size),
279 		    NULL, dblk_kmem_flags);
280 
281 		while (lastsize <= size) {
282 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
283 			lastsize += DBLK_MIN_SIZE;
284 		}
285 	}
286 
287 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t),
288 	    DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL,
289 	    (void *)sizeof (dblk_t), NULL, dblk_kmem_flags);
290 
291 	/* fthdr_cache, ftblk_cache, mmd_init... */
292 }
293 
294 /*ARGSUSED*/
295 mblk_t *
296 allocb(size_t size, uint_t pri)
297 {
298 	dblk_t *dbp;
299 	mblk_t *mp;
300 	size_t index;
301 
302 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
303 
304 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
305 		if (size != 0) {
306 			mp = allocb_oversize(size, KM_NOSLEEP);
307 			goto out;
308 		}
309 		index = 0;
310 	}
311 
312 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
313 		mp = NULL;
314 		goto out;
315 	}
316 
317 	mp = dbp->db_mblk;
318 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
319 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
320 	mp->b_rptr = mp->b_wptr = dbp->db_base;
321 	mp->b_queue = NULL;
322 	MBLK_BAND_FLAG_WORD(mp) = 0;
323 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
324 out:
325 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
326 
327 	return (mp);
328 }
329 
330 /*
331  * Allocate an mblk taking db_credp and db_cpid from the template.
332  * Allow the cred to be NULL.
333  */
334 mblk_t *
335 allocb_tmpl(size_t size, const mblk_t *tmpl)
336 {
337 	mblk_t *mp = allocb(size, 0);
338 
339 	if (mp != NULL) {
340 		dblk_t *src = tmpl->b_datap;
341 		dblk_t *dst = mp->b_datap;
342 		cred_t *cr;
343 		pid_t cpid;
344 
345 		cr = msg_getcred(tmpl, &cpid);
346 		if (cr != NULL)
347 			crhold(dst->db_credp = cr);
348 		dst->db_cpid = cpid;
349 		dst->db_type = src->db_type;
350 	}
351 	return (mp);
352 }
353 
354 mblk_t *
355 allocb_cred(size_t size, cred_t *cr, pid_t cpid)
356 {
357 	mblk_t *mp = allocb(size, 0);
358 
359 	ASSERT(cr != NULL);
360 	if (mp != NULL) {
361 		dblk_t *dbp = mp->b_datap;
362 
363 		crhold(dbp->db_credp = cr);
364 		dbp->db_cpid = cpid;
365 	}
366 	return (mp);
367 }
368 
369 mblk_t *
370 allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid)
371 {
372 	mblk_t *mp = allocb_wait(size, 0, flags, error);
373 
374 	ASSERT(cr != NULL);
375 	if (mp != NULL) {
376 		dblk_t *dbp = mp->b_datap;
377 
378 		crhold(dbp->db_credp = cr);
379 		dbp->db_cpid = cpid;
380 	}
381 
382 	return (mp);
383 }
384 
385 /*
386  * Extract the db_cred (and optionally db_cpid) from a message.
387  * We find the first mblk which has a non-NULL db_cred and use that.
388  * If none found we return NULL.
389  * Does NOT get a hold on the cred.
390  */
391 cred_t *
392 msg_getcred(const mblk_t *mp, pid_t *cpidp)
393 {
394 	cred_t *cr = NULL;
395 
396 	while (mp != NULL) {
397 		dblk_t *dbp = mp->b_datap;
398 
399 		cr = dbp->db_credp;
400 		if (cr == NULL) {
401 			mp = mp->b_cont;
402 			continue;
403 		}
404 		if (cpidp != NULL)
405 			*cpidp = dbp->db_cpid;
406 
407 		/* DEBUG check for only one db_credp */
408 		return (cr);
409 	}
410 	if (cpidp != NULL)
411 		*cpidp = NOPID;
412 	return (NULL);
413 }
414 
415 /*
416  * Variant of msg_getcred which, when a cred is found
417  * 1. Returns with a hold on the cred
418  * 2. Clears the first cred in the mblk.
419  * This is more efficient to use than a msg_getcred() + crhold() when
420  * the message is freed after the cred has been extracted.
421  *
422  * The caller is responsible for ensuring that there is no other reference
423  * on the message since db_credp can not be cleared when there are other
424  * references.
425  */
426 cred_t *
427 msg_extractcred(mblk_t *mp, pid_t *cpidp)
428 {
429 	cred_t *cr = NULL;
430 
431 	while (mp != NULL) {
432 		dblk_t *dbp = mp->b_datap;
433 
434 		cr = dbp->db_credp;
435 		if (cr == NULL) {
436 			mp = mp->b_cont;
437 			continue;
438 		}
439 		ASSERT(dbp->db_ref == 1);
440 		dbp->db_credp = NULL;
441 		if (cpidp != NULL)
442 			*cpidp = dbp->db_cpid;
443 
444 		/* DEBUG check for only one db_credp */
445 		return (cr);
446 	}
447 	return (NULL);
448 }
449 
450 /* _KERNEL msg_getlabel() */
451 
452 void
453 freeb(mblk_t *mp)
454 {
455 	dblk_t *dbp = mp->b_datap;
456 
457 	ASSERT(dbp->db_ref > 0);
458 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
459 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
460 
461 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
462 
463 	dbp->db_free(mp, dbp);
464 }
465 
466 void
467 freemsg(mblk_t *mp)
468 {
469 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
470 	while (mp) {
471 		dblk_t *dbp = mp->b_datap;
472 		mblk_t *mp_cont = mp->b_cont;
473 
474 		ASSERT(dbp->db_ref > 0);
475 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
476 
477 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
478 
479 		dbp->db_free(mp, dbp);
480 		mp = mp_cont;
481 	}
482 }
483 
484 /*
485  * Reallocate a block for another use.  Try hard to use the old block.
486  * If the old data is wanted (copy), leave b_wptr at the end of the data,
487  * otherwise return b_wptr = b_rptr.
488  *
489  * This routine is private and unstable.
490  */
491 mblk_t	*
492 reallocb(mblk_t *mp, size_t size, uint_t copy)
493 {
494 	mblk_t		*mp1;
495 	unsigned char	*old_rptr;
496 	ptrdiff_t	cur_size;
497 
498 	if (mp == NULL)
499 		return (allocb(size, BPRI_HI));
500 
501 	cur_size = mp->b_wptr - mp->b_rptr;
502 	old_rptr = mp->b_rptr;
503 
504 	ASSERT(mp->b_datap->db_ref != 0);
505 
506 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
507 		/*
508 		 * If the data is wanted and it will fit where it is, no
509 		 * work is required.
510 		 */
511 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
512 			return (mp);
513 
514 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
515 		mp1 = mp;
516 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
517 		/* XXX other mp state could be copied too, db_flags ... ? */
518 		mp1->b_cont = mp->b_cont;
519 	} else {
520 		return (NULL);
521 	}
522 
523 	if (copy) {
524 		bcopy(old_rptr, mp1->b_rptr, cur_size);
525 		mp1->b_wptr = mp1->b_rptr + cur_size;
526 	}
527 
528 	if (mp != mp1)
529 		freeb(mp);
530 
531 	return (mp1);
532 }
533 
534 static void
535 dblk_lastfree(mblk_t *mp, dblk_t *dbp)
536 {
537 	ASSERT(dbp->db_mblk == mp);
538 	if (dbp->db_fthdr != NULL)
539 		str_ftfree(dbp);
540 
541 	/* set credp and projid to be 'unspecified' before returning to cache */
542 	if (dbp->db_credp != NULL) {
543 		crfree(dbp->db_credp);
544 		dbp->db_credp = NULL;
545 	}
546 	dbp->db_cpid = -1;
547 
548 	/* Reset the struioflag and the checksum flag fields */
549 	dbp->db_struioflag = 0;
550 	dbp->db_struioun.cksum.flags = 0;
551 
552 	/* and the COOKED and/or UIOA flag(s) */
553 	dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
554 
555 	kmem_cache_free(dbp->db_cache, dbp);
556 }
557 
558 static void
559 dblk_decref(mblk_t *mp, dblk_t *dbp)
560 {
561 	if (dbp->db_ref != 1) {
562 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
563 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
564 		/*
565 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
566 		 * have a reference to the dblk, which means another thread
567 		 * could free it.  Therefore we cannot examine the dblk to
568 		 * determine whether ours was the last reference.  Instead,
569 		 * we extract the new and minimum reference counts from rtfu.
570 		 * Note that all we're really saying is "if (ref != refmin)".
571 		 */
572 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
573 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
574 			kmem_cache_free(mblk_cache, mp);
575 			return;
576 		}
577 	}
578 	dbp->db_mblk = mp;
579 	dbp->db_free = dbp->db_lastfree;
580 	dbp->db_lastfree(mp, dbp);
581 }
582 
583 mblk_t *
584 dupb(mblk_t *mp)
585 {
586 	dblk_t *dbp = mp->b_datap;
587 	mblk_t *new_mp;
588 	uint32_t oldrtfu, newrtfu;
589 
590 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
591 		goto out;
592 
593 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
594 	new_mp->b_rptr = mp->b_rptr;
595 	new_mp->b_wptr = mp->b_wptr;
596 	new_mp->b_datap = dbp;
597 	new_mp->b_queue = NULL;
598 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
599 
600 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
601 
602 	dbp->db_free = dblk_decref;
603 	do {
604 		ASSERT(dbp->db_ref > 0);
605 		oldrtfu = DBLK_RTFU_WORD(dbp);
606 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
607 		/*
608 		 * If db_ref is maxed out we can't dup this message anymore.
609 		 */
610 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
611 			kmem_cache_free(mblk_cache, new_mp);
612 			new_mp = NULL;
613 			goto out;
614 		}
615 	} while (atomic_cas_32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) !=
616 	    oldrtfu);
617 
618 out:
619 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
620 	return (new_mp);
621 }
622 
623 /*ARGSUSED*/
624 static void
625 frnop_func(void *arg)
626 {
627 }
628 
629 /*
630  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
631  * and allocb_oversize
632  */
633 static mblk_t *
634 gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
635 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
636 {
637 	dblk_t *dbp;
638 	mblk_t *mp;
639 
640 	ASSERT(base != NULL && frp != NULL);
641 
642 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
643 		mp = NULL;
644 		goto out;
645 	}
646 
647 	mp = dbp->db_mblk;
648 	dbp->db_base = base;
649 	dbp->db_lim = base + size;
650 	dbp->db_free = dbp->db_lastfree = lastfree;
651 	dbp->db_frtnp = frp;
652 	DBLK_RTFU_WORD(dbp) = db_rtfu;
653 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
654 	mp->b_rptr = mp->b_wptr = base;
655 	mp->b_queue = NULL;
656 	MBLK_BAND_FLAG_WORD(mp) = 0;
657 
658 out:
659 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
660 	return (mp);
661 }
662 
663 static void
664 bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
665 {
666 	bcache_t *bcp = dbp->db_cache;
667 
668 	ASSERT(dbp->db_mblk == mp);
669 	if (dbp->db_fthdr != NULL)
670 		str_ftfree(dbp);
671 
672 	/* set credp and projid to be 'unspecified' before returning to cache */
673 	if (dbp->db_credp != NULL) {
674 		crfree(dbp->db_credp);
675 		dbp->db_credp = NULL;
676 	}
677 	dbp->db_cpid = -1;
678 	dbp->db_struioflag = 0;
679 	dbp->db_struioun.cksum.flags = 0;
680 
681 	mutex_enter(&bcp->mutex);
682 	kmem_cache_free(bcp->dblk_cache, dbp);
683 	bcp->alloc--;
684 
685 	if (bcp->alloc == 0 && bcp->destroy != 0) {
686 		kmem_cache_destroy(bcp->dblk_cache);
687 		kmem_cache_destroy(bcp->buffer_cache);
688 		mutex_exit(&bcp->mutex);
689 		mutex_destroy(&bcp->mutex);
690 		kmem_free(bcp, sizeof (bcache_t));
691 	} else {
692 		mutex_exit(&bcp->mutex);
693 	}
694 }
695 
696 bcache_t *
697 bcache_create(char *name, size_t size, uint_t align)
698 {
699 	bcache_t *bcp;
700 	char buffer[255];
701 
702 	ASSERT((align & (align - 1)) == 0);
703 
704 	if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL)
705 		return (NULL);
706 
707 	bcp->size = size;
708 	bcp->align = align;
709 	bcp->alloc = 0;
710 	bcp->destroy = 0;
711 
712 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
713 
714 	(void) sprintf(buffer, "%s_buffer_cache", name);
715 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
716 	    NULL, NULL, NULL, 0);
717 	(void) sprintf(buffer, "%s_dblk_cache", name);
718 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
719 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
720 	    NULL, (void *)bcp, NULL, 0);
721 
722 	return (bcp);
723 }
724 
725 void
726 bcache_destroy(bcache_t *bcp)
727 {
728 	ASSERT(bcp != NULL);
729 
730 	mutex_enter(&bcp->mutex);
731 	if (bcp->alloc == 0) {
732 		kmem_cache_destroy(bcp->dblk_cache);
733 		kmem_cache_destroy(bcp->buffer_cache);
734 		mutex_exit(&bcp->mutex);
735 		mutex_destroy(&bcp->mutex);
736 		kmem_free(bcp, sizeof (bcache_t));
737 	} else {
738 		bcp->destroy++;
739 		mutex_exit(&bcp->mutex);
740 	}
741 }
742 
743 /*ARGSUSED*/
744 mblk_t *
745 bcache_allocb(bcache_t *bcp, uint_t pri)
746 {
747 	dblk_t *dbp;
748 	mblk_t *mp = NULL;
749 
750 	ASSERT(bcp != NULL);
751 
752 	mutex_enter(&bcp->mutex);
753 	if (bcp->destroy != 0) {
754 		mutex_exit(&bcp->mutex);
755 		goto out;
756 	}
757 
758 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
759 		mutex_exit(&bcp->mutex);
760 		goto out;
761 	}
762 	bcp->alloc++;
763 	mutex_exit(&bcp->mutex);
764 
765 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
766 
767 	mp = dbp->db_mblk;
768 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
769 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
770 	mp->b_rptr = mp->b_wptr = dbp->db_base;
771 	mp->b_queue = NULL;
772 	MBLK_BAND_FLAG_WORD(mp) = 0;
773 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
774 out:
775 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
776 
777 	return (mp);
778 }
779 
780 static void
781 dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
782 {
783 	ASSERT(dbp->db_mblk == mp);
784 	if (dbp->db_fthdr != NULL)
785 		str_ftfree(dbp);
786 
787 	/* set credp and projid to be 'unspecified' before returning to cache */
788 	if (dbp->db_credp != NULL) {
789 		crfree(dbp->db_credp);
790 		dbp->db_credp = NULL;
791 	}
792 	dbp->db_cpid = -1;
793 	dbp->db_struioflag = 0;
794 	dbp->db_struioun.cksum.flags = 0;
795 
796 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
797 	kmem_cache_free(dbp->db_cache, dbp);
798 }
799 
800 static mblk_t *
801 allocb_oversize(size_t size, int kmflags)
802 {
803 	mblk_t *mp;
804 	void *buf;
805 
806 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
807 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
808 		return (NULL);
809 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
810 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
811 		kmem_free(buf, size);
812 
813 	if (mp != NULL)
814 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
815 
816 	return (mp);
817 }
818 
819 mblk_t *
820 allocb_tryhard(size_t target_size)
821 {
822 	size_t size;
823 	mblk_t *bp;
824 
825 	for (size = target_size; size < target_size + 512;
826 	    size += DBLK_CACHE_ALIGN)
827 		if ((bp = allocb(size, BPRI_HI)) != NULL)
828 			return (bp);
829 	allocb_tryhard_fails++;
830 	return (NULL);
831 }
832 
833 /*
834  * This routine is consolidation private for STREAMS internal use
835  * This routine may only be called from sync routines (i.e., not
836  * from put or service procedures).  It is located here (rather
837  * than strsubr.c) so that we don't have to expose all of the
838  * allocb() implementation details in header files.
839  */
840 mblk_t *
841 allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
842 {
843 	dblk_t *dbp;
844 	mblk_t *mp;
845 	size_t index;
846 
847 	index = (size -1) >> DBLK_SIZE_SHIFT;
848 
849 	if (flags & STR_NOSIG) {
850 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
851 			if (size != 0) {
852 				mp = allocb_oversize(size, KM_SLEEP);
853 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
854 				    (uintptr_t)mp);
855 				return (mp);
856 			}
857 			index = 0;
858 		}
859 
860 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
861 		mp = dbp->db_mblk;
862 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
863 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
864 		mp->b_rptr = mp->b_wptr = dbp->db_base;
865 		mp->b_queue = NULL;
866 		MBLK_BAND_FLAG_WORD(mp) = 0;
867 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
868 
869 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
870 
871 	} else {
872 		while ((mp = allocb(size, pri)) == NULL) {
873 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
874 				return (NULL);
875 		}
876 	}
877 
878 	return (mp);
879 }
880 
881 /*
882  * Call function 'func' with 'arg' when a class zero block can
883  * be allocated with priority 'pri'.
884  */
885 bufcall_id_t
886 esbbcall(uint_t pri, void (*func)(void *), void *arg)
887 {
888 	return (bufcall(1, pri, func, arg));
889 }
890 
891 /*
892  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
893  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
894  * This provides consistency for all internal allocators of ioctl.
895  */
896 mblk_t *
897 mkiocb(uint_t cmd)
898 {
899 	struct iocblk	*ioc;
900 	mblk_t		*mp;
901 
902 	/*
903 	 * Allocate enough space for any of the ioctl related messages.
904 	 */
905 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
906 		return (NULL);
907 
908 	bzero(mp->b_rptr, sizeof (union ioctypes));
909 
910 	/*
911 	 * Set the mblk_t information and ptrs correctly.
912 	 */
913 	mp->b_wptr += sizeof (struct iocblk);
914 	mp->b_datap->db_type = M_IOCTL;
915 
916 	/*
917 	 * Fill in the fields.
918 	 */
919 	ioc		= (struct iocblk *)mp->b_rptr;
920 	ioc->ioc_cmd	= cmd;
921 	ioc->ioc_cr	= kcred;
922 	ioc->ioc_id	= getiocseqno();
923 	ioc->ioc_flag	= IOC_NATIVE;
924 	return (mp);
925 }
926 
927 /*
928  * test if block of given size can be allocated with a request of
929  * the given priority.
930  * 'pri' is no longer used, but is retained for compatibility.
931  */
932 /* ARGSUSED */
933 int
934 testb(size_t size, uint_t pri)
935 {
936 	return ((size + sizeof (dblk_t)) <= kmem_avail());
937 }
938 
939 /* _KERNEL: bufcall, unbufcall */
940 
941 /*
942  * Duplicate a message block by block (uses dupb), returning
943  * a pointer to the duplicate message.
944  * Returns a non-NULL value only if the entire message
945  * was dup'd.
946  */
947 mblk_t *
948 dupmsg(mblk_t *bp)
949 {
950 	mblk_t *head, *nbp;
951 
952 	if (!bp || !(nbp = head = dupb(bp)))
953 		return (NULL);
954 
955 	while (bp->b_cont) {
956 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
957 			freemsg(head);
958 			return (NULL);
959 		}
960 		nbp = nbp->b_cont;
961 		bp = bp->b_cont;
962 	}
963 	return (head);
964 }
965 
966 #define	DUPB_NOLOAN(bp) \
967 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
968 	copyb((bp)) : dupb((bp)))
969 
970 mblk_t *
971 dupmsg_noloan(mblk_t *bp)
972 {
973 	mblk_t *head, *nbp;
974 
975 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
976 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
977 		return (NULL);
978 
979 	while (bp->b_cont) {
980 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
981 			freemsg(head);
982 			return (NULL);
983 		}
984 		nbp = nbp->b_cont;
985 		bp = bp->b_cont;
986 	}
987 	return (head);
988 }
989 
990 /*
991  * Copy data from message and data block to newly allocated message and
992  * data block. Returns new message block pointer, or NULL if error.
993  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
994  * as in the original even when db_base is not word aligned. (bug 1052877)
995  */
996 mblk_t *
997 copyb(mblk_t *bp)
998 {
999 	mblk_t	*nbp;
1000 	dblk_t	*dp, *ndp;
1001 	uchar_t *base;
1002 	size_t	size;
1003 	size_t	unaligned;
1004 
1005 	ASSERT(bp->b_wptr >= bp->b_rptr);
1006 
1007 	dp = bp->b_datap;
1008 	if (dp->db_fthdr != NULL)
1009 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
1010 
1011 	/*
1012 	 * Special handling for Multidata message; this should be
1013 	 * removed once a copy-callback routine is made available.
1014 	 */
1015 	if (dp->db_type == M_MULTIDATA) {
1016 		/* _KERNEL mmd_copy stuff */
1017 		return (NULL);
1018 	}
1019 
1020 	size = dp->db_lim - dp->db_base;
1021 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
1022 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
1023 		return (NULL);
1024 	nbp->b_flag = bp->b_flag;
1025 	nbp->b_band = bp->b_band;
1026 	ndp = nbp->b_datap;
1027 
1028 	/*
1029 	 * Well, here is a potential issue.  If we are trying to
1030 	 * trace a flow, and we copy the message, we might lose
1031 	 * information about where this message might have been.
1032 	 * So we should inherit the FT data.  On the other hand,
1033 	 * a user might be interested only in alloc to free data.
1034 	 * So I guess the real answer is to provide a tunable.
1035 	 */
1036 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
1037 
1038 	base = ndp->db_base + unaligned;
1039 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
1040 
1041 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
1042 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
1043 
1044 	return (nbp);
1045 }
1046 
1047 /*
1048  * Copy data from message to newly allocated message using new
1049  * data blocks.  Returns a pointer to the new message, or NULL if error.
1050  */
1051 mblk_t *
1052 copymsg(mblk_t *bp)
1053 {
1054 	mblk_t *head, *nbp;
1055 
1056 	if (!bp || !(nbp = head = copyb(bp)))
1057 		return (NULL);
1058 
1059 	while (bp->b_cont) {
1060 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
1061 			freemsg(head);
1062 			return (NULL);
1063 		}
1064 		nbp = nbp->b_cont;
1065 		bp = bp->b_cont;
1066 	}
1067 	return (head);
1068 }
1069 
1070 /*
1071  * link a message block to tail of message
1072  */
1073 void
1074 linkb(mblk_t *mp, mblk_t *bp)
1075 {
1076 	ASSERT(mp && bp);
1077 
1078 	for (; mp->b_cont; mp = mp->b_cont)
1079 		;
1080 	mp->b_cont = bp;
1081 }
1082 
1083 /*
1084  * unlink a message block from head of message
1085  * return pointer to new message.
1086  * NULL if message becomes empty.
1087  */
1088 mblk_t *
1089 unlinkb(mblk_t *bp)
1090 {
1091 	mblk_t *bp1;
1092 
1093 	bp1 = bp->b_cont;
1094 	bp->b_cont = NULL;
1095 	return (bp1);
1096 }
1097 
1098 /*
1099  * remove a message block "bp" from message "mp"
1100  *
1101  * Return pointer to new message or NULL if no message remains.
1102  * Return -1 if bp is not found in message.
1103  */
1104 mblk_t *
1105 rmvb(mblk_t *mp, mblk_t *bp)
1106 {
1107 	mblk_t *tmp;
1108 	mblk_t *lastp = NULL;
1109 
1110 	ASSERT(mp && bp);
1111 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1112 		if (tmp == bp) {
1113 			if (lastp)
1114 				lastp->b_cont = tmp->b_cont;
1115 			else
1116 				mp = tmp->b_cont;
1117 			tmp->b_cont = NULL;
1118 			return (mp);
1119 		}
1120 		lastp = tmp;
1121 	}
1122 	return ((mblk_t *)-1);
1123 }
1124 
1125 /*
1126  * Concatenate and align first len bytes of common
1127  * message type.  Len == -1, means concat everything.
1128  * Returns 1 on success, 0 on failure
1129  * After the pullup, mp points to the pulled up data.
1130  */
1131 int
1132 pullupmsg(mblk_t *mp, ssize_t len)
1133 {
1134 	mblk_t *bp, *b_cont;
1135 	dblk_t *dbp;
1136 	ssize_t n;
1137 
1138 	ASSERT(mp->b_datap->db_ref > 0);
1139 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
1140 
1141 	/*
1142 	 * We won't handle Multidata message, since it contains
1143 	 * metadata which this function has no knowledge of; we
1144 	 * assert on DEBUG, and return failure otherwise.
1145 	 */
1146 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1147 	if (mp->b_datap->db_type == M_MULTIDATA)
1148 		return (0);
1149 
1150 	if (len == -1) {
1151 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
1152 			return (1);
1153 		len = xmsgsize(mp);
1154 	} else {
1155 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
1156 		ASSERT(first_mblk_len >= 0);
1157 		/*
1158 		 * If the length is less than that of the first mblk,
1159 		 * we want to pull up the message into an aligned mblk.
1160 		 * Though not part of the spec, some callers assume it.
1161 		 */
1162 		if (len <= first_mblk_len) {
1163 			if (str_aligned(mp->b_rptr))
1164 				return (1);
1165 			len = first_mblk_len;
1166 		} else if (xmsgsize(mp) < len)
1167 			return (0);
1168 	}
1169 
1170 	if ((bp = allocb_tmpl(len, mp)) == NULL)
1171 		return (0);
1172 
1173 	dbp = bp->b_datap;
1174 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
1175 	mp->b_datap = dbp;	/* ... and mp heads the new message */
1176 	mp->b_datap->db_mblk = mp;
1177 	bp->b_datap->db_mblk = bp;
1178 	mp->b_rptr = mp->b_wptr = dbp->db_base;
1179 
1180 	do {
1181 		ASSERT(bp->b_datap->db_ref > 0);
1182 		ASSERT(bp->b_wptr >= bp->b_rptr);
1183 		n = MIN(bp->b_wptr - bp->b_rptr, len);
1184 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
1185 		if (n > 0)
1186 			bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
1187 		mp->b_wptr += n;
1188 		bp->b_rptr += n;
1189 		len -= n;
1190 		if (bp->b_rptr != bp->b_wptr)
1191 			break;
1192 		b_cont = bp->b_cont;
1193 		freeb(bp);
1194 		bp = b_cont;
1195 	} while (len && bp);
1196 
1197 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
1198 
1199 	return (1);
1200 }
1201 
1202 /*
1203  * Concatenate and align at least the first len bytes of common message
1204  * type.  Len == -1 means concatenate everything.  The original message is
1205  * unaltered.  Returns a pointer to a new message on success, otherwise
1206  * returns NULL.
1207  */
1208 mblk_t *
1209 msgpullup(mblk_t *mp, ssize_t len)
1210 {
1211 	mblk_t	*newmp;
1212 	ssize_t	totlen;
1213 	ssize_t	n;
1214 
1215 	/*
1216 	 * We won't handle Multidata message, since it contains
1217 	 * metadata which this function has no knowledge of; we
1218 	 * assert on DEBUG, and return failure otherwise.
1219 	 */
1220 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1221 	if (mp->b_datap->db_type == M_MULTIDATA)
1222 		return (NULL);
1223 
1224 	totlen = xmsgsize(mp);
1225 
1226 	if ((len > 0) && (len > totlen))
1227 		return (NULL);
1228 
1229 	/*
1230 	 * Copy all of the first msg type into one new mblk, then dupmsg
1231 	 * and link the rest onto this.
1232 	 */
1233 
1234 	len = totlen;
1235 
1236 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
1237 		return (NULL);
1238 
1239 	newmp->b_flag = mp->b_flag;
1240 	newmp->b_band = mp->b_band;
1241 
1242 	while (len > 0) {
1243 		n = mp->b_wptr - mp->b_rptr;
1244 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
1245 		if (n > 0)
1246 			bcopy(mp->b_rptr, newmp->b_wptr, n);
1247 		newmp->b_wptr += n;
1248 		len -= n;
1249 		mp = mp->b_cont;
1250 	}
1251 
1252 	if (mp != NULL) {
1253 		newmp->b_cont = dupmsg(mp);
1254 		if (newmp->b_cont == NULL) {
1255 			freemsg(newmp);
1256 			return (NULL);
1257 		}
1258 	}
1259 
1260 	return (newmp);
1261 }
1262 
1263 /*
1264  * Trim bytes from message
1265  *  len > 0, trim from head
1266  *  len < 0, trim from tail
1267  * Returns 1 on success, 0 on failure.
1268  */
1269 int
1270 adjmsg(mblk_t *mp, ssize_t len)
1271 {
1272 	mblk_t *bp;
1273 	mblk_t *save_bp = NULL;
1274 	mblk_t *prev_bp;
1275 	mblk_t *bcont;
1276 	unsigned char type;
1277 	ssize_t n;
1278 	int fromhead;
1279 	int first;
1280 
1281 	ASSERT(mp != NULL);
1282 	/*
1283 	 * We won't handle Multidata message, since it contains
1284 	 * metadata which this function has no knowledge of; we
1285 	 * assert on DEBUG, and return failure otherwise.
1286 	 */
1287 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1288 	if (mp->b_datap->db_type == M_MULTIDATA)
1289 		return (0);
1290 
1291 	if (len < 0) {
1292 		fromhead = 0;
1293 		len = -len;
1294 	} else {
1295 		fromhead = 1;
1296 	}
1297 
1298 	if (xmsgsize(mp) < len)
1299 		return (0);
1300 
1301 	if (fromhead) {
1302 		first = 1;
1303 		while (len) {
1304 			ASSERT(mp->b_wptr >= mp->b_rptr);
1305 			n = MIN(mp->b_wptr - mp->b_rptr, len);
1306 			mp->b_rptr += n;
1307 			len -= n;
1308 
1309 			/*
1310 			 * If this is not the first zero length
1311 			 * message remove it
1312 			 */
1313 			if (!first && (mp->b_wptr == mp->b_rptr)) {
1314 				bcont = mp->b_cont;
1315 				freeb(mp);
1316 				mp = save_bp->b_cont = bcont;
1317 			} else {
1318 				save_bp = mp;
1319 				mp = mp->b_cont;
1320 			}
1321 			first = 0;
1322 		}
1323 	} else {
1324 		type = mp->b_datap->db_type;
1325 		while (len) {
1326 			bp = mp;
1327 			save_bp = NULL;
1328 
1329 			/*
1330 			 * Find the last message of same type
1331 			 */
1332 			while (bp && bp->b_datap->db_type == type) {
1333 				ASSERT(bp->b_wptr >= bp->b_rptr);
1334 				prev_bp = save_bp;
1335 				save_bp = bp;
1336 				bp = bp->b_cont;
1337 			}
1338 			if (save_bp == NULL)
1339 				break;
1340 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
1341 			save_bp->b_wptr -= n;
1342 			len -= n;
1343 
1344 			/*
1345 			 * If this is not the first message
1346 			 * and we have taken away everything
1347 			 * from this message, remove it
1348 			 */
1349 
1350 			if ((save_bp != mp) &&
1351 			    (save_bp->b_wptr == save_bp->b_rptr)) {
1352 				bcont = save_bp->b_cont;
1353 				freeb(save_bp);
1354 				prev_bp->b_cont = bcont;
1355 			}
1356 		}
1357 	}
1358 	return (1);
1359 }
1360 
1361 /*
1362  * get number of data bytes in message
1363  */
1364 size_t
1365 msgdsize(mblk_t *bp)
1366 {
1367 	size_t count = 0;
1368 
1369 	for (; bp; bp = bp->b_cont)
1370 		if (bp->b_datap->db_type == M_DATA) {
1371 			ASSERT(bp->b_wptr >= bp->b_rptr);
1372 			count += bp->b_wptr - bp->b_rptr;
1373 		}
1374 	return (count);
1375 }
1376 
1377 /* getq() etc to EOF removed */
1378