1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
39 #include <netinet/tcp_seq.h>
40 #include <netinet/sctp.h>
41 
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/ip_if.h>
45 #include <inet/ip6.h>
46 #include <inet/mib2.h>
47 #include <inet/ipclassifier.h>
48 #include <inet/ipp_common.h>
49 #include <inet/ipsec_impl.h>
50 #include <inet/sctp_ip.h>
51 
52 #include "sctp_impl.h"
53 #include "sctp_asconf.h"
54 #include "sctp_addr.h"
55 
56 static struct kmem_cache *sctp_kmem_set_cache;
57 
58 /*
59  * PR-SCTP comments.
60  *
61  * When we get a valid Forward TSN chunk, we check the fragment list for this
62  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
63  * the next expected SSN to be present in the stream queue, we deliver any
64  * such stranded messages upstream. We also update the SACK info. appropriately.
65  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
66  * check for abandoned chunks and messages. While traversing the tramsmit
67  * list if we come across an abandoned chunk, we can skip the message (i.e.
68  * take it out of the (re)transmit list) since this message, and hence this
69  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
70  * unsent chunk for a message this now abandoned we need to check if a
71  * Forward TSN needs to be sent, this could be a case where we deferred sending
72  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
73  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
74  * if we can send a Forward TSN via sctp_check_abandoned_data().
75  */
76 void
sctp_free_set(sctp_set_t * s)77 sctp_free_set(sctp_set_t *s)
78 {
79 	sctp_set_t *p;
80 
81 	while (s) {
82 		p = s->next;
83 		kmem_cache_free(sctp_kmem_set_cache, s);
84 		s = p;
85 	}
86 }
87 
88 static void
sctp_ack_add(sctp_set_t ** head,uint32_t tsn,int * num)89 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
90 {
91 	sctp_set_t *p, *t;
92 
93 	if (head == NULL || num == NULL)
94 		return;
95 
96 	ASSERT(*num >= 0);
97 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
98 
99 	if (*head == NULL) {
100 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
101 		if (*head == NULL)
102 			return;
103 		(*head)->prev = (*head)->next = NULL;
104 		(*head)->begin = tsn;
105 		(*head)->end = tsn;
106 		*num = 1;
107 		return;
108 	}
109 
110 	ASSERT((*head)->prev == NULL);
111 
112 	/*
113 	 * Handle this special case here so we don't have to check
114 	 * for it each time in the loop.
115 	 */
116 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
117 		/* add a new set, and move the head pointer */
118 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
119 		if (t == NULL)
120 			return;
121 		t->next = *head;
122 		t->prev = NULL;
123 		(*head)->prev = t;
124 		t->begin = tsn;
125 		t->end = tsn;
126 		(*num)++;
127 		*head = t;
128 		return;
129 	}
130 
131 	/*
132 	 * We need to handle the following cases, where p points to
133 	 * the current set (as we walk through the loop):
134 	 *
135 	 * 1. tsn is entirely less than p; create a new set before p.
136 	 * 2. tsn borders p from less; coalesce p with tsn.
137 	 * 3. tsn is withing p; do nothing.
138 	 * 4. tsn borders p from greater; coalesce p with tsn.
139 	 * 4a. p may now border p->next from less; if so, coalesce those
140 	 *    two sets.
141 	 * 5. tsn is entirely greater then all sets; add a new set at
142 	 *    the end.
143 	 */
144 	for (p = *head; ; p = p->next) {
145 		if (SEQ_LT(tsn + 1, p->begin)) {
146 			/* 1: add a new set before p. */
147 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
148 			if (t == NULL)
149 				return;
150 			t->next = p;
151 			t->prev = NULL;
152 			t->begin = tsn;
153 			t->end = tsn;
154 			if (p->prev) {
155 				t->prev = p->prev;
156 				p->prev->next = t;
157 			}
158 			p->prev = t;
159 			(*num)++;
160 			return;
161 		}
162 
163 		if ((tsn + 1) == p->begin) {
164 			/* 2: adjust p->begin */
165 			p->begin = tsn;
166 			return;
167 		}
168 
169 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
170 			/* 3; do nothing */
171 			return;
172 		}
173 
174 		if ((p->end + 1) == tsn) {
175 			/* 4; adjust p->end */
176 			p->end = tsn;
177 
178 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
179 				/* 4a: coalesce p and p->next */
180 				t = p->next;
181 				p->end = t->end;
182 				p->next = t->next;
183 				if (t->next != NULL)
184 					t->next->prev = p;
185 				kmem_cache_free(sctp_kmem_set_cache, t);
186 				(*num)--;
187 			}
188 			return;
189 		}
190 
191 		if (p->next == NULL) {
192 			/* 5: add new set at the end */
193 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
194 			if (t == NULL)
195 				return;
196 			t->next = NULL;
197 			t->prev = p;
198 			t->begin = tsn;
199 			t->end = tsn;
200 			p->next = t;
201 			(*num)++;
202 			return;
203 		}
204 
205 		if (SEQ_GT(tsn, p->end + 1))
206 			continue;
207 	}
208 }
209 
210 static void
sctp_ack_rem(sctp_set_t ** head,uint32_t end,int * num)211 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
212 {
213 	sctp_set_t *p, *t;
214 
215 	if (head == NULL || *head == NULL || num == NULL)
216 		return;
217 
218 	/* Nothing to remove */
219 	if (SEQ_LT(end, (*head)->begin))
220 		return;
221 
222 	/* Find out where to start removing sets */
223 	for (p = *head; p->next; p = p->next) {
224 		if (SEQ_LEQ(end, p->end))
225 			break;
226 	}
227 
228 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
229 		/* adjust p */
230 		p->begin = end + 1;
231 		/* all done */
232 		if (p == *head)
233 			return;
234 	} else if (SEQ_GEQ(end, p->end)) {
235 		/* remove this set too */
236 		p = p->next;
237 	}
238 
239 	/* unlink everything before this set */
240 	t = *head;
241 	*head = p;
242 	if (p != NULL && p->prev != NULL) {
243 		p->prev->next = NULL;
244 		p->prev = NULL;
245 	}
246 
247 	sctp_free_set(t);
248 
249 	/* recount the number of sets */
250 	*num = 0;
251 
252 	for (p = *head; p != NULL; p = p->next)
253 		(*num)++;
254 }
255 
256 void
sctp_sets_init()257 sctp_sets_init()
258 {
259 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
260 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
261 	    NULL, 0);
262 }
263 
264 void
sctp_sets_fini()265 sctp_sets_fini()
266 {
267 	kmem_cache_destroy(sctp_kmem_set_cache);
268 }
269 
270 sctp_chunk_hdr_t *
sctp_first_chunk(uchar_t * rptr,ssize_t remaining)271 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
272 {
273 	sctp_chunk_hdr_t *ch;
274 	uint16_t ch_len;
275 
276 	if (remaining < sizeof (*ch)) {
277 		return (NULL);
278 	}
279 
280 	ch = (sctp_chunk_hdr_t *)rptr;
281 	ch_len = ntohs(ch->sch_len);
282 
283 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
284 		return (NULL);
285 	}
286 
287 	return (ch);
288 }
289 
290 sctp_chunk_hdr_t *
sctp_next_chunk(sctp_chunk_hdr_t * ch,ssize_t * remaining)291 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
292 {
293 	int pad;
294 	uint16_t ch_len;
295 
296 	if (!ch) {
297 		return (NULL);
298 	}
299 
300 	ch_len = ntohs(ch->sch_len);
301 
302 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
303 		pad = SCTP_ALIGN - pad;
304 	}
305 
306 	*remaining -= (ch_len + pad);
307 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
308 
309 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
310 }
311 
312 /*
313  * Attach ancillary data to a received SCTP segments.
314  * If the source address (fp) is not the primary, send up a
315  * unitdata_ind so recvfrom() can populate the msg_name field.
316  * If ancillary data is also requested, we append it to the
317  * unitdata_req. Otherwise, we just send up an optdata_ind.
318  */
319 static int
sctp_input_add_ancillary(sctp_t * sctp,mblk_t ** mp,sctp_data_hdr_t * dcp,sctp_faddr_t * fp,ip_pkt_t * ipp,ip_recv_attr_t * ira)320 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
321     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
322 {
323 	struct T_unitdata_ind	*tudi;
324 	int			optlen;
325 	int			hdrlen;
326 	uchar_t			*optptr;
327 	struct cmsghdr		*cmsg;
328 	mblk_t			*mp1;
329 	struct sockaddr_in6	sin_buf[1];
330 	struct sockaddr_in6	*sin6;
331 	struct sockaddr_in	*sin4;
332 	crb_t			 addflag;	/* Which pieces to add */
333 	conn_t			*connp = sctp->sctp_connp;
334 
335 	sin4 = NULL;
336 	sin6 = NULL;
337 
338 	optlen = hdrlen = 0;
339 	addflag.crb_all = 0;
340 
341 	/* Figure out address size */
342 	if (connp->conn_family == AF_INET) {
343 		sin4 = (struct sockaddr_in *)sin_buf;
344 		sin4->sin_family = AF_INET;
345 		sin4->sin_port = connp->conn_fport;
346 		IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sin4->sin_addr.s_addr);
347 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
348 	} else {
349 		sin6 = sin_buf;
350 		sin6->sin6_family = AF_INET6;
351 		sin6->sin6_port = connp->conn_fport;
352 		sin6->sin6_addr = fp->sf_faddr;
353 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
354 	}
355 	/* If app asked to receive send / recv info */
356 	if (sctp->sctp_recvsndrcvinfo)
357 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
358 
359 	if (connp->conn_recv_ancillary.crb_all == 0)
360 		goto noancillary;
361 
362 	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
363 	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
364 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
365 		if (hdrlen == 0)
366 			hdrlen = sizeof (struct T_unitdata_ind);
367 		addflag.crb_ip_recvpktinfo = 1;
368 	}
369 	/* If app asked for hoplimit and it has changed ... */
370 	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
371 	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
372 		optlen += sizeof (*cmsg) + sizeof (uint_t);
373 		if (hdrlen == 0)
374 			hdrlen = sizeof (struct T_unitdata_ind);
375 		addflag.crb_ipv6_recvhoplimit = 1;
376 	}
377 	/* If app asked for tclass and it has changed ... */
378 	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
379 	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
380 		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
381 		if (hdrlen == 0)
382 			hdrlen = sizeof (struct T_unitdata_ind);
383 		addflag.crb_ipv6_recvtclass = 1;
384 	}
385 	/* If app asked for hopbyhop headers and it has changed ... */
386 	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
387 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
388 	    (ipp->ipp_fields & IPPF_HOPOPTS),
389 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
390 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
391 		    sctp->sctp_v6label_len;
392 		if (hdrlen == 0)
393 			hdrlen = sizeof (struct T_unitdata_ind);
394 		addflag.crb_ipv6_recvhopopts = 1;
395 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
396 		    &sctp->sctp_hopoptslen,
397 		    (ipp->ipp_fields & IPPF_HOPOPTS),
398 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
399 			return (-1);
400 	}
401 	/* If app asked for dst headers before routing headers ... */
402 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
403 	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
404 	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
405 	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
406 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
407 		if (hdrlen == 0)
408 			hdrlen = sizeof (struct T_unitdata_ind);
409 		addflag.crb_ipv6_recvrthdrdstopts = 1;
410 		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
411 		    &sctp->sctp_rthdrdstoptslen,
412 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
413 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
414 			return (-1);
415 	}
416 	/* If app asked for routing headers and it has changed ... */
417 	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
418 	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
419 	    (ipp->ipp_fields & IPPF_RTHDR),
420 	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
421 		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
422 		if (hdrlen == 0)
423 			hdrlen = sizeof (struct T_unitdata_ind);
424 		addflag.crb_ipv6_recvrthdr = 1;
425 		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
426 		    &sctp->sctp_rthdrlen,
427 		    (ipp->ipp_fields & IPPF_RTHDR),
428 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
429 			return (-1);
430 	}
431 	/* If app asked for dest headers and it has changed ... */
432 	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
433 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
434 	    (ipp->ipp_fields & IPPF_DSTOPTS),
435 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
436 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
437 		if (hdrlen == 0)
438 			hdrlen = sizeof (struct T_unitdata_ind);
439 		addflag.crb_ipv6_recvdstopts = 1;
440 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
441 		    &sctp->sctp_dstoptslen,
442 		    (ipp->ipp_fields & IPPF_DSTOPTS),
443 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
444 			return (-1);
445 	}
446 noancillary:
447 	/* Nothing to add */
448 	if (hdrlen == 0)
449 		return (-1);
450 
451 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
452 	if (mp1 == NULL)
453 		return (-1);
454 	mp1->b_cont = *mp;
455 	*mp = mp1;
456 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
457 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
458 	DB_TYPE(mp1) = M_PROTO;
459 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
460 	tudi->PRIM_type = T_UNITDATA_IND;
461 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
462 	tudi->SRC_offset = sizeof (*tudi);
463 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
464 	tudi->OPT_length = optlen;
465 	if (sin4) {
466 		bcopy(sin4, tudi + 1, sizeof (*sin4));
467 	} else {
468 		bcopy(sin6, tudi + 1, sizeof (*sin6));
469 	}
470 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
471 
472 	if (sctp->sctp_recvsndrcvinfo) {
473 		/* XXX need backout method if memory allocation fails. */
474 		struct sctp_sndrcvinfo *sri;
475 
476 		cmsg = (struct cmsghdr *)optptr;
477 		cmsg->cmsg_level = IPPROTO_SCTP;
478 		cmsg->cmsg_type = SCTP_SNDRCV;
479 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
480 		optptr += sizeof (*cmsg);
481 
482 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
483 		ASSERT(OK_32PTR(sri));
484 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
485 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
486 		if (SCTP_DATA_GET_UBIT(dcp)) {
487 			sri->sinfo_flags = MSG_UNORDERED;
488 		} else {
489 			sri->sinfo_flags = 0;
490 		}
491 		sri->sinfo_ppid = dcp->sdh_payload_id;
492 		sri->sinfo_context = 0;
493 		sri->sinfo_timetolive = 0;
494 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
495 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
496 		sri->sinfo_assoc_id = 0;
497 
498 		optptr += sizeof (*sri);
499 	}
500 
501 	/*
502 	 * If app asked for pktinfo and the index has changed ...
503 	 * Note that the local address never changes for the connection.
504 	 */
505 	if (addflag.crb_ip_recvpktinfo) {
506 		struct in6_pktinfo *pkti;
507 		uint_t ifindex;
508 
509 		ifindex = ira->ira_ruifindex;
510 		cmsg = (struct cmsghdr *)optptr;
511 		cmsg->cmsg_level = IPPROTO_IPV6;
512 		cmsg->cmsg_type = IPV6_PKTINFO;
513 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
514 		optptr += sizeof (*cmsg);
515 
516 		pkti = (struct in6_pktinfo *)optptr;
517 		if (connp->conn_family == AF_INET6)
518 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
519 		else
520 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
521 			    &pkti->ipi6_addr);
522 
523 		pkti->ipi6_ifindex = ifindex;
524 		optptr += sizeof (*pkti);
525 		ASSERT(OK_32PTR(optptr));
526 		/* Save as "last" value */
527 		sctp->sctp_recvifindex = ifindex;
528 	}
529 	/* If app asked for hoplimit and it has changed ... */
530 	if (addflag.crb_ipv6_recvhoplimit) {
531 		cmsg = (struct cmsghdr *)optptr;
532 		cmsg->cmsg_level = IPPROTO_IPV6;
533 		cmsg->cmsg_type = IPV6_HOPLIMIT;
534 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
535 		optptr += sizeof (*cmsg);
536 
537 		*(uint_t *)optptr = ipp->ipp_hoplimit;
538 		optptr += sizeof (uint_t);
539 		ASSERT(OK_32PTR(optptr));
540 		/* Save as "last" value */
541 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
542 	}
543 	/* If app asked for tclass and it has changed ... */
544 	if (addflag.crb_ipv6_recvtclass) {
545 		cmsg = (struct cmsghdr *)optptr;
546 		cmsg->cmsg_level = IPPROTO_IPV6;
547 		cmsg->cmsg_type = IPV6_TCLASS;
548 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
549 		optptr += sizeof (*cmsg);
550 
551 		*(uint_t *)optptr = ipp->ipp_tclass;
552 		optptr += sizeof (uint_t);
553 		ASSERT(OK_32PTR(optptr));
554 		/* Save as "last" value */
555 		sctp->sctp_recvtclass = ipp->ipp_tclass;
556 	}
557 	if (addflag.crb_ipv6_recvhopopts) {
558 		cmsg = (struct cmsghdr *)optptr;
559 		cmsg->cmsg_level = IPPROTO_IPV6;
560 		cmsg->cmsg_type = IPV6_HOPOPTS;
561 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
562 		optptr += sizeof (*cmsg);
563 
564 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
565 		optptr += ipp->ipp_hopoptslen;
566 		ASSERT(OK_32PTR(optptr));
567 		/* Save as last value */
568 		ip_savebuf((void **)&sctp->sctp_hopopts,
569 		    &sctp->sctp_hopoptslen,
570 		    (ipp->ipp_fields & IPPF_HOPOPTS),
571 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
572 	}
573 	if (addflag.crb_ipv6_recvrthdrdstopts) {
574 		cmsg = (struct cmsghdr *)optptr;
575 		cmsg->cmsg_level = IPPROTO_IPV6;
576 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
577 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
578 		optptr += sizeof (*cmsg);
579 
580 		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
581 		optptr += ipp->ipp_rthdrdstoptslen;
582 		ASSERT(OK_32PTR(optptr));
583 		/* Save as last value */
584 		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
585 		    &sctp->sctp_rthdrdstoptslen,
586 		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
587 		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
588 	}
589 	if (addflag.crb_ipv6_recvrthdr) {
590 		cmsg = (struct cmsghdr *)optptr;
591 		cmsg->cmsg_level = IPPROTO_IPV6;
592 		cmsg->cmsg_type = IPV6_RTHDR;
593 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
594 		optptr += sizeof (*cmsg);
595 
596 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
597 		optptr += ipp->ipp_rthdrlen;
598 		ASSERT(OK_32PTR(optptr));
599 		/* Save as last value */
600 		ip_savebuf((void **)&sctp->sctp_rthdr,
601 		    &sctp->sctp_rthdrlen,
602 		    (ipp->ipp_fields & IPPF_RTHDR),
603 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
604 	}
605 	if (addflag.crb_ipv6_recvdstopts) {
606 		cmsg = (struct cmsghdr *)optptr;
607 		cmsg->cmsg_level = IPPROTO_IPV6;
608 		cmsg->cmsg_type = IPV6_DSTOPTS;
609 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
610 		optptr += sizeof (*cmsg);
611 
612 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
613 		optptr += ipp->ipp_dstoptslen;
614 		ASSERT(OK_32PTR(optptr));
615 		/* Save as last value */
616 		ip_savebuf((void **)&sctp->sctp_dstopts,
617 		    &sctp->sctp_dstoptslen,
618 		    (ipp->ipp_fields & IPPF_DSTOPTS),
619 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
620 	}
621 
622 	ASSERT(optptr == mp1->b_wptr);
623 
624 	return (0);
625 }
626 
627 void
sctp_free_reass(sctp_instr_t * sip)628 sctp_free_reass(sctp_instr_t *sip)
629 {
630 	mblk_t *mp, *mpnext, *mctl;
631 #ifdef	DEBUG
632 	sctp_reass_t	*srp;
633 #endif
634 
635 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
636 		mpnext = mp->b_next;
637 		mp->b_next = NULL;
638 		mp->b_prev = NULL;
639 		if (DB_TYPE(mp) == M_CTL) {
640 			mctl = mp;
641 #ifdef	DEBUG
642 			srp = (sctp_reass_t *)DB_BASE(mctl);
643 			/* Partial delivery can leave empty srp */
644 			ASSERT(mp->b_cont != NULL || srp->sr_got == 0);
645 #endif
646 			mp = mp->b_cont;
647 			mctl->b_cont = NULL;
648 			freeb(mctl);
649 		}
650 		freemsg(mp);
651 	}
652 	sip->istr_reass = NULL;
653 }
654 
655 /*
656  * If the series of data fragments of which dmp is a part is successfully
657  * reassembled, the first mblk in the series is returned. dc is adjusted
658  * to point at the data chunk in the lead mblk, and b_rptr also points to
659  * the data chunk; the following mblk's b_rptr's point at the actual payload.
660  *
661  * If the series is not yet reassembled, NULL is returned. dc is not changed.
662  * XXX should probably move this up into the state machine.
663  */
664 
665 /* Fragment list for un-ordered messages. Partial delivery is not supported */
666 static mblk_t *
sctp_uodata_frag(sctp_t * sctp,mblk_t * dmp,sctp_data_hdr_t ** dc)667 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
668 {
669 	mblk_t		*hmp;
670 	mblk_t		*begin = NULL;
671 	mblk_t		*end = NULL;
672 	sctp_data_hdr_t	*qdc;
673 	uint32_t	ntsn;
674 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
675 #ifdef	DEBUG
676 	mblk_t		*mp1;
677 #endif
678 
679 	/* First frag. */
680 	if (sctp->sctp_uo_frags == NULL) {
681 		sctp->sctp_uo_frags = dmp;
682 		return (NULL);
683 	}
684 	hmp = sctp->sctp_uo_frags;
685 	/*
686 	 * Insert the segment according to the TSN, fragmented unordered
687 	 * chunks are sequenced by TSN.
688 	 */
689 	while (hmp != NULL) {
690 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
691 		ntsn = ntohl(qdc->sdh_tsn);
692 		if (SEQ_GT(ntsn, tsn)) {
693 			if (hmp->b_prev == NULL) {
694 				dmp->b_next = hmp;
695 				hmp->b_prev = dmp;
696 				sctp->sctp_uo_frags = dmp;
697 			} else {
698 				dmp->b_next = hmp;
699 				dmp->b_prev = hmp->b_prev;
700 				hmp->b_prev->b_next = dmp;
701 				hmp->b_prev = dmp;
702 			}
703 			break;
704 		}
705 		if (hmp->b_next == NULL) {
706 			hmp->b_next = dmp;
707 			dmp->b_prev = hmp;
708 			break;
709 		}
710 		hmp = hmp->b_next;
711 	}
712 	/* check if we completed a msg */
713 	if (SCTP_DATA_GET_BBIT(*dc)) {
714 		begin = dmp;
715 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
716 		end = dmp;
717 	}
718 	/*
719 	 * We walk consecutive TSNs backwards till we get a seg. with
720 	 * the B bit
721 	 */
722 	if (begin == NULL) {
723 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
724 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
725 			ntsn = ntohl(qdc->sdh_tsn);
726 			if ((int32_t)(tsn - ntsn) > 1) {
727 				return (NULL);
728 			}
729 			if (SCTP_DATA_GET_BBIT(qdc)) {
730 				begin = hmp;
731 				break;
732 			}
733 			tsn = ntsn;
734 		}
735 	}
736 	tsn = ntohl((*dc)->sdh_tsn);
737 	/*
738 	 * We walk consecutive TSNs till we get a seg. with the E bit
739 	 */
740 	if (end == NULL) {
741 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
742 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
743 			ntsn = ntohl(qdc->sdh_tsn);
744 			if ((int32_t)(ntsn - tsn) > 1) {
745 				return (NULL);
746 			}
747 			if (SCTP_DATA_GET_EBIT(qdc)) {
748 				end = hmp;
749 				break;
750 			}
751 			tsn = ntsn;
752 		}
753 	}
754 	if (begin == NULL || end == NULL) {
755 		return (NULL);
756 	}
757 	/* Got one!, Remove the msg from the list */
758 	if (sctp->sctp_uo_frags == begin) {
759 		ASSERT(begin->b_prev == NULL);
760 		sctp->sctp_uo_frags = end->b_next;
761 		if (end->b_next != NULL)
762 			end->b_next->b_prev = NULL;
763 	} else {
764 		begin->b_prev->b_next = end->b_next;
765 		if (end->b_next != NULL)
766 			end->b_next->b_prev = begin->b_prev;
767 	}
768 	begin->b_prev = NULL;
769 	end->b_next = NULL;
770 
771 	/*
772 	 * Null out b_next and b_prev and chain using b_cont.
773 	 */
774 	dmp = end = begin;
775 	hmp = begin->b_next;
776 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
777 	begin->b_next = NULL;
778 	while (hmp != NULL) {
779 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
780 		hmp->b_rptr = (uchar_t *)(qdc + 1);
781 		end = hmp->b_next;
782 		dmp->b_cont = hmp;
783 		dmp = hmp;
784 
785 		if (end != NULL)
786 			hmp->b_next = NULL;
787 		hmp->b_prev = NULL;
788 		hmp = end;
789 	}
790 	BUMP_LOCAL(sctp->sctp_reassmsgs);
791 #ifdef	DEBUG
792 	mp1 = begin;
793 	while (mp1 != NULL) {
794 		ASSERT(mp1->b_next == NULL);
795 		ASSERT(mp1->b_prev == NULL);
796 		mp1 = mp1->b_cont;
797 	}
798 #endif
799 	return (begin);
800 }
801 
802 /*
803  * Try partial delivery.
804  */
805 static mblk_t *
sctp_try_partial_delivery(sctp_t * sctp,mblk_t * hmp,sctp_reass_t * srp,sctp_data_hdr_t ** dc)806 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
807     sctp_data_hdr_t **dc)
808 {
809 	mblk_t		*mp;
810 	mblk_t		*dmp;
811 	mblk_t		*qmp;
812 	mblk_t		*prev;
813 	sctp_data_hdr_t	*qdc;
814 	uint32_t	tsn;
815 
816 	ASSERT(DB_TYPE(hmp) == M_CTL);
817 
818 	dprint(4, ("trypartial: got=%d, needed=%d\n",
819 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
820 
821 	mp = hmp->b_cont;
822 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
823 
824 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->sr_hasBchunk);
825 
826 	tsn = ntohl(qdc->sdh_tsn) + 1;
827 
828 	/*
829 	 * This loop has two exit conditions: the
830 	 * end of received chunks has been reached, or
831 	 * there is a break in the sequence. We want
832 	 * to chop the reassembly list as follows (the
833 	 * numbers are TSNs):
834 	 *   10 -> 11 ->	(end of chunks)
835 	 *   10 -> 11 -> | 13   (break in sequence)
836 	 */
837 	prev = mp;
838 	mp = mp->b_cont;
839 	while (mp != NULL) {
840 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
841 		if (ntohl(qdc->sdh_tsn) != tsn)
842 			break;
843 		prev = mp;
844 		mp = mp->b_cont;
845 		tsn++;
846 	}
847 	/*
848 	 * We are sending all the fragments upstream, we have to retain
849 	 * the srp info for further fragments.
850 	 */
851 	if (mp == NULL) {
852 		dmp = hmp->b_cont;
853 		hmp->b_cont = NULL;
854 		srp->sr_nexttsn = tsn;
855 		srp->sr_msglen = 0;
856 		srp->sr_needed = 0;
857 		srp->sr_got = 0;
858 		srp->sr_tail = NULL;
859 	} else {
860 		/*
861 		 * There is a gap then some ordered frags which are not
862 		 * the next deliverable tsn. When the next deliverable
863 		 * frag arrives it will be set as the new list head in
864 		 * sctp_data_frag() by setting the B bit.
865 		 */
866 		dmp = hmp->b_cont;
867 		hmp->b_cont = mp;
868 	}
869 	srp->sr_hasBchunk = B_FALSE;
870 	/*
871 	 * mp now points at the last chunk in the sequence,
872 	 * and prev points to mp's previous in the list.
873 	 * We chop the list at prev. Subsequent fragment
874 	 * deliveries will follow the normal reassembly
875 	 * path unless they too exceed the sctp_pd_point.
876 	 */
877 	prev->b_cont = NULL;
878 	srp->sr_partial_delivered = B_TRUE;
879 
880 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
881 	    (int)(srp->sr_got), (int)(srp->sr_needed)));
882 
883 	/*
884 	 * Adjust all mblk's except the lead so their rptr's point to the
885 	 * payload. sctp_data_chunk() will need to process the lead's
886 	 * data chunk section, so leave it's rptr pointing at the data chunk.
887 	 */
888 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
889 	if (srp->sr_tail != NULL) {
890 		srp->sr_got--;
891 		ASSERT(srp->sr_got != 0);
892 		if (srp->sr_needed != 0) {
893 			srp->sr_needed--;
894 			ASSERT(srp->sr_needed != 0);
895 		}
896 		srp->sr_msglen -= ntohs((*dc)->sdh_len);
897 	}
898 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
899 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
900 		qmp->b_rptr = (uchar_t *)(qdc + 1);
901 
902 		/*
903 		 * Deduct the balance from got and needed here, now that
904 		 * we know we are actually delivering these data.
905 		 */
906 		if (srp->sr_tail != NULL) {
907 			srp->sr_got--;
908 			ASSERT(srp->sr_got != 0);
909 			if (srp->sr_needed != 0) {
910 				srp->sr_needed--;
911 				ASSERT(srp->sr_needed != 0);
912 			}
913 			srp->sr_msglen -= ntohs(qdc->sdh_len);
914 		}
915 	}
916 	ASSERT(srp->sr_msglen == 0);
917 	BUMP_LOCAL(sctp->sctp_reassmsgs);
918 
919 	return (dmp);
920 }
921 
922 /*
923  * Handle received fragments for ordered delivery to upper layer protocol.
924  * Manage the per message reassembly queue and if this fragment completes
925  * reassembly of the message, or qualifies the already reassembled data
926  * for partial delivery, prepare the message for delivery upstream.
927  *
928  * tpfinished in the caller remains set only when the incoming fragment
929  * has completed the reassembly of the message associated with its ssn.
930  */
931 static mblk_t *
sctp_data_frag(sctp_t * sctp,mblk_t * dmp,sctp_data_hdr_t ** dc,int * error,sctp_instr_t * sip,boolean_t * tpfinished)932 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
933     sctp_instr_t *sip, boolean_t *tpfinished)
934 {
935 	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
936 	mblk_t		*new_reassq;
937 	mblk_t		*qmp;
938 	mblk_t		*first_mp;
939 	sctp_reass_t	*srp;
940 	sctp_data_hdr_t	*qdc;
941 	sctp_data_hdr_t	*bdc;
942 	sctp_data_hdr_t	*edc;
943 	uint32_t	tsn;
944 	uint16_t	fraglen = 0;
945 
946 	reassq_curr = NULL;
947 	*error = 0;
948 
949 	/*
950 	 * Find the reassembly queue for this data chunk, if none
951 	 * yet exists, a new per message queue will be created and
952 	 * appended to the end of the list of per message queues.
953 	 *
954 	 * sip points on sctp_instr_t representing instream messages
955 	 * as yet undelivered for this stream (sid) of the association.
956 	 */
957 	reassq_next = reassq_prev = sip->istr_reass;
958 	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
959 		srp = (sctp_reass_t *)DB_BASE(reassq_next);
960 		if (ntohs((*dc)->sdh_ssn) == srp->sr_ssn) {
961 			reassq_curr = reassq_next;
962 			goto foundit;
963 		} else if (SSN_GT(srp->sr_ssn, ntohs((*dc)->sdh_ssn)))
964 			break;
965 		reassq_prev = reassq_next;
966 	}
967 
968 	/*
969 	 * First fragment of this message received, allocate a M_CTL that
970 	 * will head the reassembly queue for this message. The message
971 	 * and all its fragments are identified by having the same ssn.
972 	 *
973 	 * Arriving fragments will be inserted in tsn order on the
974 	 * reassembly queue for this message (ssn), linked by b_cont.
975 	 */
976 	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
977 		*error = ENOMEM;
978 		return (NULL);
979 	}
980 	DB_TYPE(new_reassq) = M_CTL;
981 	srp = (sctp_reass_t *)DB_BASE(new_reassq);
982 	new_reassq->b_cont = dmp;
983 
984 	/*
985 	 * All per ssn reassembly queues, (one for each message) on
986 	 * this stream are doubly linked by b_next/b_prev back to the
987 	 * instr_reass of the instream structure associated with this
988 	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
989 	 * Insert the new reassembly queue in the correct (ssn) order.
990 	 */
991 	if (reassq_next != NULL) {
992 		if (sip->istr_reass == reassq_next) {
993 			/* head insertion */
994 			sip->istr_reass = new_reassq;
995 			new_reassq->b_next = reassq_next;
996 			new_reassq->b_prev = NULL;
997 			reassq_next->b_prev = new_reassq;
998 		} else {
999 			/* mid queue insertion */
1000 			reassq_prev->b_next = new_reassq;
1001 			new_reassq->b_prev = reassq_prev;
1002 			new_reassq->b_next = reassq_next;
1003 			reassq_next->b_prev = new_reassq;
1004 		}
1005 	} else {
1006 		/* place new reassembly queue at the end */
1007 		if (sip->istr_reass == NULL) {
1008 			sip->istr_reass = new_reassq;
1009 			new_reassq->b_prev = NULL;
1010 		} else {
1011 			reassq_prev->b_next = new_reassq;
1012 			new_reassq->b_prev = reassq_prev;
1013 		}
1014 		new_reassq->b_next = NULL;
1015 	}
1016 	srp->sr_partial_delivered = B_FALSE;
1017 	srp->sr_ssn = ntohs((*dc)->sdh_ssn);
1018 	srp->sr_hasBchunk = B_FALSE;
1019 empty_srp:
1020 	srp->sr_needed = 0;
1021 	srp->sr_got = 1;
1022 	/* tail always the highest tsn on the reassembly queue for this ssn */
1023 	srp->sr_tail = dmp;
1024 	if (SCTP_DATA_GET_BBIT(*dc)) {
1025 		/* Incoming frag is flagged as the beginning of message */
1026 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1027 		srp->sr_nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1028 		srp->sr_hasBchunk = B_TRUE;
1029 	} else if (srp->sr_partial_delivered &&
1030 	    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1031 		/*
1032 		 * The real beginning fragment of the message was already
1033 		 * delivered upward, so this is the earliest frag expected.
1034 		 * Fake the B-bit then see if this frag also completes the
1035 		 * message.
1036 		 */
1037 		SCTP_DATA_SET_BBIT(*dc);
1038 		srp->sr_hasBchunk = B_TRUE;
1039 		srp->sr_msglen = ntohs((*dc)->sdh_len);
1040 		if (SCTP_DATA_GET_EBIT(*dc)) {
1041 			/* This frag is marked as the end of message */
1042 			srp->sr_needed = 1;
1043 			/* Got all fragments of this message now */
1044 			goto frag_done;
1045 		}
1046 		srp->sr_nexttsn++;
1047 	}
1048 
1049 	/* The only fragment of this message currently queued */
1050 	*tpfinished = B_FALSE;
1051 	return (NULL);
1052 foundit:
1053 	/*
1054 	 * This message already has a reassembly queue. Insert the new frag
1055 	 * in the reassembly queue. Try the tail first, on the assumption
1056 	 * that the fragments are arriving in order.
1057 	 */
1058 	qmp = srp->sr_tail;
1059 
1060 	/*
1061 	 * A NULL tail means all existing fragments of the message have
1062 	 * been entirely consumed during a partially delivery.
1063 	 */
1064 	if (qmp == NULL) {
1065 		ASSERT(srp->sr_got == 0 && srp->sr_needed == 0 &&
1066 		    srp->sr_partial_delivered);
1067 		ASSERT(reassq_curr->b_cont == NULL);
1068 		reassq_curr->b_cont = dmp;
1069 		goto empty_srp;
1070 	} else {
1071 		/*
1072 		 * If partial delivery did take place but the next arriving
1073 		 * fragment was not the next to be delivered, or partial
1074 		 * delivery broke off due to a gap, fragments remain on the
1075 		 * tail. The next fragment due to be delivered still has to
1076 		 * be set as the new head of list upon arrival. Fake B-bit
1077 		 * on that frag then see if it also completes the message.
1078 		 */
1079 		if (srp->sr_partial_delivered &&
1080 		    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1081 			SCTP_DATA_SET_BBIT(*dc);
1082 			srp->sr_hasBchunk = B_TRUE;
1083 			if (SCTP_DATA_GET_EBIT(*dc)) {
1084 				/* Got all fragments of this message now */
1085 				goto frag_done;
1086 			}
1087 		}
1088 	}
1089 
1090 	/* grab the frag header of already queued tail frag for comparison */
1091 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1092 	ASSERT(qmp->b_cont == NULL);
1093 
1094 	/* check if the frag goes on the tail in order */
1095 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1096 		qmp->b_cont = dmp;
1097 		srp->sr_tail = dmp;
1098 		dmp->b_cont = NULL;
1099 		if (srp->sr_hasBchunk && srp->sr_nexttsn ==
1100 		    ntohl((*dc)->sdh_tsn)) {
1101 			srp->sr_msglen += ntohs((*dc)->sdh_len);
1102 			srp->sr_nexttsn++;
1103 		}
1104 		goto inserted;
1105 	}
1106 
1107 	/* Next check if we should insert this frag at the beginning */
1108 	qmp = reassq_curr->b_cont;
1109 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1110 	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1111 		dmp->b_cont = qmp;
1112 		reassq_curr->b_cont = dmp;
1113 		if (SCTP_DATA_GET_BBIT(*dc)) {
1114 			srp->sr_hasBchunk = B_TRUE;
1115 			srp->sr_nexttsn = ntohl((*dc)->sdh_tsn);
1116 		}
1117 		goto preinserted;
1118 	}
1119 
1120 	/* Insert this frag in it's correct order in the middle */
1121 	for (;;) {
1122 		/* Tail check above should have caught this */
1123 		ASSERT(qmp->b_cont != NULL);
1124 
1125 		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1126 		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1127 			/* insert here */
1128 			dmp->b_cont = qmp->b_cont;
1129 			qmp->b_cont = dmp;
1130 			break;
1131 		}
1132 		qmp = qmp->b_cont;
1133 	}
1134 preinserted:
1135 	/*
1136 	 * Need head of message and to be due to deliver, otherwise skip
1137 	 * the recalculation of the message length below.
1138 	 */
1139 	if (!srp->sr_hasBchunk || ntohl((*dc)->sdh_tsn) != srp->sr_nexttsn)
1140 		goto inserted;
1141 	/*
1142 	 * fraglen contains the length of consecutive chunks of fragments.
1143 	 * starting from the chunk we just inserted.
1144 	 */
1145 	tsn = srp->sr_nexttsn;
1146 	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1147 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1148 		if (tsn != ntohl(qdc->sdh_tsn))
1149 			break;
1150 		fraglen += ntohs(qdc->sdh_len);
1151 		tsn++;
1152 	}
1153 	srp->sr_nexttsn = tsn;
1154 	srp->sr_msglen += fraglen;
1155 inserted:
1156 	srp->sr_got++;
1157 	first_mp = reassq_curr->b_cont;
1158 	/* Prior to this frag either the beginning or end frag was missing */
1159 	if (srp->sr_needed == 0) {
1160 		/* used to check if we have the first and last fragments */
1161 		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1162 		edc = (sctp_data_hdr_t *)srp->sr_tail->b_rptr;
1163 
1164 		/*
1165 		 * If we now have both the beginning and the end of the message,
1166 		 * calculate how many fragments in the complete message.
1167 		 */
1168 		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1169 			srp->sr_needed = ntohl(edc->sdh_tsn) -
1170 			    ntohl(bdc->sdh_tsn) + 1;
1171 		}
1172 	}
1173 
1174 	/*
1175 	 * Try partial delivery if the message length has exceeded the
1176 	 * partial delivery point. Only do this if we can immediately
1177 	 * deliver the partially assembled message, and only partially
1178 	 * deliver one message at a time (i.e. messages cannot be
1179 	 * intermixed arriving at the upper layer).
1180 	 * sctp_try_partial_delivery() will return a message consisting
1181 	 * of only consecutive fragments.
1182 	 */
1183 	if (srp->sr_needed != srp->sr_got) {
1184 		/* we don't have the full message yet */
1185 		dmp = NULL;
1186 		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1187 		    srp->sr_msglen >= sctp->sctp_pd_point &&
1188 		    srp->sr_ssn == sip->nextseq) {
1189 			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1190 			    srp, dc);
1191 		}
1192 		*tpfinished = B_FALSE;
1193 		/*
1194 		 * NULL unless a segment of the message now qualified for
1195 		 * partial_delivery and has been prepared for delivery by
1196 		 * sctp_try_partial_delivery().
1197 		 */
1198 		return (dmp);
1199 	}
1200 frag_done:
1201 	/*
1202 	 * Reassembly complete for this message, prepare the data for delivery.
1203 	 * First unlink the reassembly queue for this ssn from the list of
1204 	 * messages in reassembly.
1205 	 */
1206 	if (sip->istr_reass == reassq_curr) {
1207 		sip->istr_reass = reassq_curr->b_next;
1208 		if (reassq_curr->b_next)
1209 			reassq_curr->b_next->b_prev = NULL;
1210 	} else {
1211 		ASSERT(reassq_curr->b_prev != NULL);
1212 		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1213 		if (reassq_curr->b_next)
1214 			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1215 	}
1216 
1217 	/*
1218 	 * Need to clean up b_prev and b_next as freeb() will
1219 	 * ASSERT that they are unused.
1220 	 */
1221 	reassq_curr->b_next = NULL;
1222 	reassq_curr->b_prev = NULL;
1223 
1224 	dmp = reassq_curr;
1225 	/* point to the head of the reassembled data message */
1226 	dmp = dmp->b_cont;
1227 	reassq_curr->b_cont = NULL;
1228 	freeb(reassq_curr);
1229 	/* Tell our caller that we are returning a complete message. */
1230 	*tpfinished = B_TRUE;
1231 
1232 	/*
1233 	 * Adjust all mblk's except the lead so their rptr's point to the
1234 	 * payload. sctp_data_chunk() will need to process the lead's data
1235 	 * data chunk section, so leave its rptr pointing at the data chunk
1236 	 * header.
1237 	 */
1238 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1239 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1240 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1241 		qmp->b_rptr = (uchar_t *)(qdc + 1);
1242 	}
1243 	BUMP_LOCAL(sctp->sctp_reassmsgs);
1244 
1245 	return (dmp);
1246 }
1247 
1248 static void
sctp_add_dup(uint32_t tsn,mblk_t ** dups)1249 sctp_add_dup(uint32_t tsn, mblk_t **dups)
1250 {
1251 	mblk_t *mp;
1252 	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1253 
1254 	if (dups == NULL) {
1255 		return;
1256 	}
1257 
1258 	/* first time? */
1259 	if (*dups == NULL) {
1260 		*dups = allocb(bsize, BPRI_MED);
1261 		if (*dups == NULL) {
1262 			return;
1263 		}
1264 	}
1265 
1266 	mp = *dups;
1267 	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1268 		/* maximum reached */
1269 		return;
1270 	}
1271 
1272 	/* add the duplicate tsn */
1273 	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1274 	mp->b_wptr += sizeof (tsn);
1275 	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1276 }
1277 
1278 /*
1279  * All incoming sctp data, complete messages and fragments are handled by
1280  * this function. Unless the U-bit is set in the data chunk it will be
1281  * delivered in order or queued until an in-order delivery can be made.
1282  */
1283 static void
sctp_data_chunk(sctp_t * sctp,sctp_chunk_hdr_t * ch,mblk_t * mp,mblk_t ** dups,sctp_faddr_t * fp,ip_pkt_t * ipp,ip_recv_attr_t * ira)1284 sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1285     sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1286 {
1287 	sctp_data_hdr_t *dc;
1288 	mblk_t *dmp, *pmp;
1289 	sctp_instr_t *instr;
1290 	int ubit;
1291 	int sid;
1292 	int isfrag;
1293 	uint16_t ssn;
1294 	uint32_t oftsn;
1295 	boolean_t can_deliver = B_TRUE;
1296 	uint32_t tsn;
1297 	int dlen;
1298 	boolean_t tpfinished = B_TRUE;
1299 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1300 	int	error;
1301 
1302 	/* The following are used multiple times, so we inline them */
1303 #define	SCTP_ACK_IT(sctp, tsn)						\
1304 	if (tsn == sctp->sctp_ftsn) {					\
1305 		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1306 		(sctp)->sctp_ftsn++;					\
1307 		if ((sctp)->sctp_sack_gaps > 0)				\
1308 			(sctp)->sctp_force_sack = 1;			\
1309 	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1310 		/* Got a gap; record it */				\
1311 		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1312 		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1313 		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1314 		    &sctp->sctp_sack_gaps);				\
1315 		sctp->sctp_force_sack = 1;				\
1316 	}
1317 
1318 	dmp = NULL;
1319 
1320 	dc = (sctp_data_hdr_t *)ch;
1321 	tsn = ntohl(dc->sdh_tsn);
1322 
1323 	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1324 
1325 	/* Check for duplicates */
1326 	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1327 		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1328 		BUMP_LOCAL(sctp->sctp_idupchunks);
1329 		sctp->sctp_force_sack = 1;
1330 		sctp_add_dup(dc->sdh_tsn, dups);
1331 		return;
1332 	}
1333 
1334 	/* Check for dups of sack'ed data */
1335 	if (sctp->sctp_sack_info != NULL) {
1336 		sctp_set_t *sp;
1337 
1338 		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1339 			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1340 				dprint(4,
1341 				    ("sctp_data_chunk: dropping dup > "
1342 				    "cumtsn\n"));
1343 				BUMP_LOCAL(sctp->sctp_idupchunks);
1344 				sctp->sctp_force_sack = 1;
1345 				sctp_add_dup(dc->sdh_tsn, dups);
1346 				return;
1347 			}
1348 		}
1349 	}
1350 
1351 	/* We can no longer deliver anything up, but still need to handle it. */
1352 	if (SCTP_IS_DETACHED(sctp)) {
1353 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
1354 		can_deliver = B_FALSE;
1355 	}
1356 
1357 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1358 
1359 	/*
1360 	 * Check for buffer space. Note if this is the next expected TSN
1361 	 * we have to take it to avoid deadlock because we cannot deliver
1362 	 * later queued TSNs and thus clear buffer space without it.
1363 	 * We drop anything that is purely zero window probe data here.
1364 	 */
1365 	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1366 	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1367 		/* Drop and SACK, but don't advance the cumulative TSN. */
1368 		sctp->sctp_force_sack = 1;
1369 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1370 		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1371 		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1372 		    ntohl(dc->sdh_tsn)));
1373 		return;
1374 	}
1375 
1376 	sid = ntohs(dc->sdh_sid);
1377 
1378 	/* Data received for a stream not negotiated for this association */
1379 	if (sid >= sctp->sctp_num_istr) {
1380 		sctp_bsc_t	inval_parm;
1381 
1382 		/* Will populate the CAUSE block in the ERROR chunk. */
1383 		inval_parm.bsc_sid = dc->sdh_sid;
1384 		/* RESERVED, ignored at the receiving end */
1385 		inval_parm.bsc_pad = 0;
1386 
1387 		/* ack and drop it */
1388 		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1389 		    sizeof (sctp_bsc_t), fp);
1390 		SCTP_ACK_IT(sctp, tsn);
1391 		return;
1392 	}
1393 
1394 	/* unordered delivery OK for this data if ubit set */
1395 	ubit = SCTP_DATA_GET_UBIT(dc);
1396 	ASSERT(sctp->sctp_instr != NULL);
1397 
1398 	/* select per stream structure for this stream from the array */
1399 	instr = &sctp->sctp_instr[sid];
1400 	/* Initialize the stream, if not yet used */
1401 	if (instr->sctp == NULL)
1402 		instr->sctp = sctp;
1403 
1404 	/* Begin and End bit set would mean a complete message */
1405 	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1406 
1407 	/* The ssn of this sctp message and of any fragments in it */
1408 	ssn = ntohs(dc->sdh_ssn);
1409 
1410 	dmp = dupb(mp);
1411 	if (dmp == NULL) {
1412 		/* drop it and don't ack, let the peer retransmit */
1413 		return;
1414 	}
1415 	/*
1416 	 * Past header and payload, note: the underlying buffer may
1417 	 * contain further chunks from the same incoming IP packet,
1418 	 * if so db_ref will be greater than one.
1419 	 */
1420 	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1421 
1422 	sctp->sctp_rxqueued += dlen;
1423 
1424 	oftsn = sctp->sctp_ftsn;
1425 
1426 	if (isfrag) {
1427 
1428 		error = 0;
1429 		/* fragmented data chunk */
1430 		dmp->b_rptr = (uchar_t *)dc;
1431 		if (ubit) {
1432 			/* prepare data for unordered delivery */
1433 			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1434 #if	DEBUG
1435 			if (dmp != NULL) {
1436 				ASSERT(instr ==
1437 				    &sctp->sctp_instr[sid]);
1438 			}
1439 #endif
1440 		} else {
1441 			/*
1442 			 * Assemble fragments and queue for ordered delivery,
1443 			 * dmp returned is NULL or the head of a complete or
1444 			 * "partial delivery" message. Any returned message
1445 			 * and all its fragments will have the same ssn as the
1446 			 * input fragment currently being handled.
1447 			 */
1448 			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1449 			    &tpfinished);
1450 		}
1451 		if (error == ENOMEM) {
1452 			/* back out the adjustment made earlier */
1453 			sctp->sctp_rxqueued -= dlen;
1454 			/*
1455 			 * Don't ack the segment,
1456 			 * the peer will retransmit.
1457 			 */
1458 			return;
1459 		}
1460 
1461 		if (dmp == NULL) {
1462 			/*
1463 			 * The frag has been queued for later in-order delivery,
1464 			 * but the cumulative TSN may need to advance, so also
1465 			 * need to perform the gap ack checks at the done label.
1466 			 */
1467 			SCTP_ACK_IT(sctp, tsn);
1468 			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1469 			    int, sid, int, tsn, uint16_t, ssn);
1470 			goto done;
1471 		}
1472 	}
1473 
1474 	/*
1475 	 * Unless message is the next for delivery to the ulp, queue complete
1476 	 * message in the correct order for ordered delivery.
1477 	 * Note: tpfinished is true when the incoming chunk contains a complete
1478 	 * message or is the final missing fragment which completed a message.
1479 	 */
1480 	if (!ubit && tpfinished && ssn != instr->nextseq) {
1481 		/* Adjust rptr to point at the data chunk for compares */
1482 		dmp->b_rptr = (uchar_t *)dc;
1483 
1484 		dprint(2,
1485 		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1486 		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1487 
1488 		if (instr->istr_msgs == NULL) {
1489 			instr->istr_msgs = dmp;
1490 			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1491 		} else {
1492 			mblk_t			*imblk = instr->istr_msgs;
1493 			sctp_data_hdr_t		*idc;
1494 
1495 			/*
1496 			 * XXXNeed to take sequence wraps into account,
1497 			 * ... and a more efficient insertion algo.
1498 			 */
1499 			for (;;) {
1500 				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1501 				if (SSN_GT(ntohs(idc->sdh_ssn),
1502 				    ntohs(dc->sdh_ssn))) {
1503 					if (instr->istr_msgs == imblk) {
1504 						instr->istr_msgs = dmp;
1505 						dmp->b_next = imblk;
1506 						imblk->b_prev = dmp;
1507 					} else {
1508 						ASSERT(imblk->b_prev != NULL);
1509 						imblk->b_prev->b_next = dmp;
1510 						dmp->b_prev = imblk->b_prev;
1511 						imblk->b_prev = dmp;
1512 						dmp->b_next = imblk;
1513 					}
1514 					break;
1515 				}
1516 				if (imblk->b_next == NULL) {
1517 					imblk->b_next = dmp;
1518 					dmp->b_prev = imblk;
1519 					break;
1520 				}
1521 				imblk = imblk->b_next;
1522 			}
1523 		}
1524 		(instr->istr_nmsgs)++;
1525 		(sctp->sctp_istr_nmsgs)++;
1526 		SCTP_ACK_IT(sctp, tsn);
1527 		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1528 		    int, sid, int, tsn, uint16_t, ssn);
1529 		return;
1530 	}
1531 
1532 	/*
1533 	 * Deliver the data directly. Recalculate dlen now since
1534 	 * we may have just reassembled this data.
1535 	 */
1536 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1537 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1538 		dlen += MBLKL(pmp);
1539 	ASSERT(sctp->sctp_rxqueued >= dlen);
1540 
1541 	/* Deliver the message. */
1542 	sctp->sctp_rxqueued -= dlen;
1543 
1544 	if (can_deliver) {
1545 		/* step past header to the payload */
1546 		dmp->b_rptr = (uchar_t *)(dc + 1);
1547 		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1548 		    ipp, ira) == 0) {
1549 			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1550 			    msgdsize(dmp)));
1551 			/*
1552 			 * We overload the meaning of b_flag for SCTP sockfs
1553 			 * internal use, to advise sockfs of partial delivery
1554 			 * semantics.
1555 			 */
1556 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1557 			if (sctp->sctp_flowctrld) {
1558 				sctp->sctp_rwnd -= dlen;
1559 				if (sctp->sctp_rwnd < 0)
1560 					sctp->sctp_rwnd = 0;
1561 			}
1562 			if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1563 			    msgdsize(dmp), 0, &error, NULL) <= 0) {
1564 				sctp->sctp_flowctrld = B_TRUE;
1565 			}
1566 			SCTP_ACK_IT(sctp, tsn);
1567 		} else {
1568 			/* No memory don't ack, the peer will retransmit. */
1569 			freemsg(dmp);
1570 			return;
1571 		}
1572 	} else {
1573 		/* Closed above, ack to peer and free the data */
1574 		freemsg(dmp);
1575 		SCTP_ACK_IT(sctp, tsn);
1576 	}
1577 
1578 	/*
1579 	 * Data now enqueued, may already have been processed and free'd
1580 	 * by the ULP (or we may have just freed it above, if we could not
1581 	 * deliver), so we must not reference it (this is why we saved the
1582 	 * ssn and ubit earlier).
1583 	 */
1584 	if (ubit != 0) {
1585 		BUMP_LOCAL(sctp->sctp_iudchunks);
1586 		goto done;
1587 	}
1588 	BUMP_LOCAL(sctp->sctp_idchunks);
1589 
1590 	/*
1591 	 * There was a partial delivery and it has not finished,
1592 	 * don't pull anything from the pqueues or increment the
1593 	 * nextseq. This msg must complete before starting on
1594 	 * the next ssn and the partial message must have the
1595 	 * same ssn as the next expected message..
1596 	 */
1597 	if (!tpfinished) {
1598 		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1599 		    int, sid, int, tsn, uint16_t, ssn);
1600 		/*
1601 		 * Verify the partial delivery is part of the
1602 		 * message expected for ordered delivery.
1603 		 */
1604 		if (ssn != instr->nextseq) {
1605 			DTRACE_PROBE4(sctp_partial_delivery_error,
1606 			    sctp_t *, sctp, int, sid, int, tsn,
1607 			    uint16_t, ssn);
1608 			cmn_err(CE_WARN, "sctp partial"
1609 			    " delivery error, sctp 0x%p"
1610 			    " sid = 0x%x ssn != nextseq"
1611 			    " tsn 0x%x ftsn 0x%x"
1612 			    " ssn 0x%x nextseq 0x%x",
1613 			    (void *)sctp, sid,
1614 			    tsn, sctp->sctp_ftsn, ssn,
1615 			    instr->nextseq);
1616 		}
1617 
1618 		ASSERT(ssn == instr->nextseq);
1619 		goto done;
1620 	}
1621 
1622 	if (ssn != instr->nextseq) {
1623 		DTRACE_PROBE4(sctp_inorder_delivery_error,
1624 		    sctp_t *, sctp, int, sid, int, tsn,
1625 		    uint16_t, ssn);
1626 		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1627 		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1628 		    (void *)sctp, sid, ssn, instr->nextseq);
1629 	}
1630 
1631 	ASSERT(ssn == instr->nextseq);
1632 
1633 	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1634 	    int, tsn, uint16_t, ssn);
1635 
1636 	instr->nextseq = ssn + 1;
1637 
1638 	/*
1639 	 * Deliver any successive data chunks waiting in the instr pqueue
1640 	 * for the data just sent up.
1641 	 */
1642 	while (instr->istr_nmsgs > 0) {
1643 		dmp = (mblk_t *)instr->istr_msgs;
1644 		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1645 		ssn = ntohs(dc->sdh_ssn);
1646 		tsn = ntohl(dc->sdh_tsn);
1647 		/* Stop at the first gap in the sequence */
1648 		if (ssn != instr->nextseq)
1649 			break;
1650 
1651 		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1652 		    int, sid, int, tsn, uint16_t, ssn);
1653 		/*
1654 		 * Ready to deliver all data before the gap
1655 		 * to the upper layer.
1656 		 */
1657 		(instr->istr_nmsgs)--;
1658 		(instr->nextseq)++;
1659 		(sctp->sctp_istr_nmsgs)--;
1660 
1661 		instr->istr_msgs = instr->istr_msgs->b_next;
1662 		if (instr->istr_msgs != NULL)
1663 			instr->istr_msgs->b_prev = NULL;
1664 		dmp->b_next = dmp->b_prev = NULL;
1665 
1666 		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1667 		    ntohl(dc->sdh_tsn), (int)ssn));
1668 
1669 		/*
1670 		 * Composite messages indicate this chunk was reassembled,
1671 		 * each b_cont represents another TSN; Follow the chain to
1672 		 * reach the frag with the last tsn in order to advance ftsn
1673 		 * shortly by calling SCTP_ACK_IT().
1674 		 */
1675 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1676 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1677 			dlen += MBLKL(pmp);
1678 
1679 		ASSERT(sctp->sctp_rxqueued >= dlen);
1680 
1681 		sctp->sctp_rxqueued -= dlen;
1682 		if (can_deliver) {
1683 			dmp->b_rptr = (uchar_t *)(dc + 1);
1684 			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1685 			    ipp, ira) == 0) {
1686 				dprint(1, ("sctp_data_chunk: delivering %lu "
1687 				    "bytes\n", msgdsize(dmp)));
1688 				/*
1689 				 * Meaning of b_flag overloaded for SCTP sockfs
1690 				 * internal use, advise sockfs of partial
1691 				 * delivery semantics.
1692 				 */
1693 				dmp->b_flag = tpfinished ?
1694 				    0 : SCTP_PARTIAL_DATA;
1695 				if (sctp->sctp_flowctrld) {
1696 					sctp->sctp_rwnd -= dlen;
1697 					if (sctp->sctp_rwnd < 0)
1698 						sctp->sctp_rwnd = 0;
1699 				}
1700 				if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1701 				    msgdsize(dmp), 0, &error, NULL) <= 0) {
1702 					sctp->sctp_flowctrld = B_TRUE;
1703 				}
1704 				SCTP_ACK_IT(sctp, tsn);
1705 			} else {
1706 				/* don't ack, the peer will retransmit */
1707 				freemsg(dmp);
1708 				return;
1709 			}
1710 		} else {
1711 			/* Closed above, ack and free the data */
1712 			freemsg(dmp);
1713 			SCTP_ACK_IT(sctp, tsn);
1714 		}
1715 	}
1716 
1717 done:
1718 
1719 	/*
1720 	 * If there are gap reports pending, check if advancing
1721 	 * the ftsn here closes a gap. If so, we can advance
1722 	 * ftsn to the end of the set.
1723 	 */
1724 	if (sctp->sctp_sack_info != NULL &&
1725 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1726 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1727 	}
1728 	/*
1729 	 * If ftsn has moved forward, maybe we can remove gap reports.
1730 	 * NB: dmp may now be NULL, so don't dereference it here.
1731 	 */
1732 	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1733 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1734 		    &sctp->sctp_sack_gaps);
1735 		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1736 		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1737 	}
1738 
1739 #ifdef	DEBUG
1740 	if (sctp->sctp_sack_info != NULL) {
1741 		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1742 	}
1743 #endif
1744 
1745 #undef	SCTP_ACK_IT
1746 }
1747 
1748 void
sctp_fill_sack(sctp_t * sctp,unsigned char * dst,int sacklen)1749 sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1750 {
1751 	sctp_chunk_hdr_t *sch;
1752 	sctp_sack_chunk_t *sc;
1753 	sctp_sack_frag_t *sf;
1754 	uint16_t num_gaps = sctp->sctp_sack_gaps;
1755 	sctp_set_t *sp;
1756 
1757 	/* Chunk hdr */
1758 	sch = (sctp_chunk_hdr_t *)dst;
1759 	sch->sch_id = CHUNK_SACK;
1760 	sch->sch_flags = 0;
1761 	sch->sch_len = htons(sacklen);
1762 
1763 	/* SACK chunk */
1764 	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1765 
1766 	sc = (sctp_sack_chunk_t *)(sch + 1);
1767 	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1768 	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1769 		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1770 	} else {
1771 		sc->ssc_a_rwnd = 0;
1772 	}
1773 	/* Remember the last window sent to peer. */
1774 	sctp->sctp_arwnd = sc->ssc_a_rwnd;
1775 	sc->ssc_numfrags = htons(num_gaps);
1776 	sc->ssc_numdups = 0;
1777 
1778 	/* lay in gap reports */
1779 	sf = (sctp_sack_frag_t *)(sc + 1);
1780 	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1781 		uint16_t offset;
1782 
1783 		/* start */
1784 		if (sp->begin > sctp->sctp_lastacked) {
1785 			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1786 		} else {
1787 			/* sequence number wrap */
1788 			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1789 			    sp->begin);
1790 		}
1791 		sf->ssf_start = htons(offset);
1792 
1793 		/* end */
1794 		if (sp->end >= sp->begin) {
1795 			offset += (uint16_t)(sp->end - sp->begin);
1796 		} else {
1797 			/* sequence number wrap */
1798 			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1799 		}
1800 		sf->ssf_end = htons(offset);
1801 
1802 		sf++;
1803 		/* This is just for debugging (a la the following assertion) */
1804 		num_gaps--;
1805 	}
1806 
1807 	ASSERT(num_gaps == 0);
1808 
1809 	/* If the SACK timer is running, stop it */
1810 	if (sctp->sctp_ack_timer_running) {
1811 		sctp_timer_stop(sctp->sctp_ack_mp);
1812 		sctp->sctp_ack_timer_running = B_FALSE;
1813 	}
1814 
1815 	BUMP_LOCAL(sctp->sctp_obchunks);
1816 	BUMP_LOCAL(sctp->sctp_osacks);
1817 }
1818 
1819 mblk_t *
sctp_make_sack(sctp_t * sctp,sctp_faddr_t * sendto,mblk_t * dups)1820 sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1821 {
1822 	mblk_t *smp;
1823 	size_t slen;
1824 	sctp_chunk_hdr_t *sch;
1825 	sctp_sack_chunk_t *sc;
1826 	int32_t acks_max;
1827 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1828 	uint32_t	dups_len;
1829 	sctp_faddr_t	*fp;
1830 
1831 	ASSERT(sendto != NULL);
1832 
1833 	if (sctp->sctp_force_sack) {
1834 		sctp->sctp_force_sack = 0;
1835 		goto checks_done;
1836 	}
1837 
1838 	acks_max = sctps->sctps_deferred_acks_max;
1839 	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1840 		if (sctp->sctp_sack_toggle < acks_max) {
1841 			/* no need to SACK right now */
1842 			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1843 			    (void *)sctp));
1844 			return (NULL);
1845 		} else if (sctp->sctp_sack_toggle >= acks_max) {
1846 			sctp->sctp_sack_toggle = 0;
1847 		}
1848 	}
1849 
1850 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1851 		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1852 		    (void *)sctp));
1853 		return (NULL);
1854 	}
1855 
1856 checks_done:
1857 	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1858 
1859 	if (dups != NULL)
1860 		dups_len = MBLKL(dups);
1861 	else
1862 		dups_len = 0;
1863 	slen = sizeof (*sch) + sizeof (*sc) +
1864 	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1865 
1866 	/*
1867 	 * If there are error chunks, check and see if we can send the
1868 	 * SACK chunk and error chunks together in one packet.  If not,
1869 	 * send the error chunks out now.
1870 	 */
1871 	if (sctp->sctp_err_chunks != NULL) {
1872 		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1873 		if (sctp->sctp_err_len + slen + dups_len > fp->sf_pmss) {
1874 			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1875 				SCTP_KSTAT(sctps, sctp_send_err_failed);
1876 				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1877 				freemsg(sctp->sctp_err_chunks);
1878 				sctp->sctp_err_chunks = NULL;
1879 				sctp->sctp_err_len = 0;
1880 				return (NULL);
1881 			}
1882 			smp->b_cont = sctp->sctp_err_chunks;
1883 			sctp_set_iplen(sctp, smp, fp->sf_ixa);
1884 			(void) conn_ip_output(smp, fp->sf_ixa);
1885 			BUMP_LOCAL(sctp->sctp_opkts);
1886 			sctp->sctp_err_chunks = NULL;
1887 			sctp->sctp_err_len = 0;
1888 		}
1889 	}
1890 	smp = sctp_make_mp(sctp, sendto, slen);
1891 	if (smp == NULL) {
1892 		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1893 		return (NULL);
1894 	}
1895 	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1896 
1897 	sctp_fill_sack(sctp, smp->b_wptr, slen);
1898 	smp->b_wptr += slen;
1899 	if (dups != NULL) {
1900 		sc = (sctp_sack_chunk_t *)(sch + 1);
1901 		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1902 		sch->sch_len = htons(slen + dups_len);
1903 		smp->b_cont = dups;
1904 	}
1905 
1906 	if (sctp->sctp_err_chunks != NULL) {
1907 		linkb(smp, sctp->sctp_err_chunks);
1908 		sctp->sctp_err_chunks = NULL;
1909 		sctp->sctp_err_len = 0;
1910 	}
1911 	return (smp);
1912 }
1913 
1914 /*
1915  * Check and see if we need to send a SACK chunk.  If it is needed,
1916  * send it out.  Return true if a SACK chunk is sent, false otherwise.
1917  */
1918 boolean_t
sctp_sack(sctp_t * sctp,mblk_t * dups)1919 sctp_sack(sctp_t *sctp, mblk_t *dups)
1920 {
1921 	mblk_t *smp;
1922 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1923 
1924 	/* If we are shutting down, let send_shutdown() bundle the SACK */
1925 	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1926 		sctp_send_shutdown(sctp, 0);
1927 	}
1928 
1929 	ASSERT(sctp->sctp_lastdata != NULL);
1930 
1931 	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1932 		/* The caller of sctp_sack() will not free the dups mblk. */
1933 		if (dups != NULL)
1934 			freeb(dups);
1935 		return (B_FALSE);
1936 	}
1937 	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1938 	    (void *)sctp->sctp_lastdata,
1939 	    SCTP_PRINTADDR(sctp->sctp_lastdata->sf_faddr)));
1940 
1941 	sctp->sctp_active = LBOLT_FASTPATH64;
1942 
1943 	SCTPS_BUMP_MIB(sctps, sctpOutAck);
1944 
1945 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->sf_ixa);
1946 	(void) conn_ip_output(smp, sctp->sctp_lastdata->sf_ixa);
1947 	BUMP_LOCAL(sctp->sctp_opkts);
1948 	return (B_TRUE);
1949 }
1950 
1951 /*
1952  * This is called if we have a message that was partially sent and is
1953  * abandoned. The cum TSN will be the last chunk sent for this message,
1954  * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1955  * chunk in this case with the TSN of the last sent chunk so that the
1956  * peer can clean up its fragment list for this message. This message
1957  * will be removed from the transmit list when the peer sends a SACK
1958  * back.
1959  */
1960 int
sctp_check_abandoned_msg(sctp_t * sctp,mblk_t * meta)1961 sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1962 {
1963 	sctp_data_hdr_t	*dh;
1964 	mblk_t		*nmp;
1965 	mblk_t		*head;
1966 	int32_t		unsent = 0;
1967 	mblk_t		*mp1 = meta->b_cont;
1968 	uint32_t	adv_pap = sctp->sctp_adv_pap;
1969 	sctp_faddr_t	*fp = sctp->sctp_current;
1970 	sctp_stack_t	*sctps = sctp->sctp_sctps;
1971 
1972 	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1973 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1974 		sctp_ftsn_set_t	*sets = NULL;
1975 		uint_t		nsets = 0;
1976 		uint32_t	seglen = sizeof (uint32_t);
1977 		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1978 
1979 		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1980 			mp1 = mp1->b_next;
1981 		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1982 		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1983 		if (!ubit &&
1984 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1985 			sctp->sctp_adv_pap = adv_pap;
1986 			return (ENOMEM);
1987 		}
1988 		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1989 		sctp_free_ftsn_set(sets);
1990 		if (nmp == NULL) {
1991 			sctp->sctp_adv_pap = adv_pap;
1992 			return (ENOMEM);
1993 		}
1994 		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1995 		if (head == NULL) {
1996 			sctp->sctp_adv_pap = adv_pap;
1997 			freemsg(nmp);
1998 			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1999 			return (ENOMEM);
2000 		}
2001 		SCTP_MSG_SET_ABANDONED(meta);
2002 		sctp_set_iplen(sctp, head, fp->sf_ixa);
2003 		(void) conn_ip_output(head, fp->sf_ixa);
2004 		BUMP_LOCAL(sctp->sctp_opkts);
2005 		if (!fp->sf_timer_running)
2006 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2007 		mp1 = mp1->b_next;
2008 		while (mp1 != NULL) {
2009 			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2010 			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2011 			SCTP_ABANDON_CHUNK(mp1);
2012 			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2013 			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2014 			mp1 = mp1->b_next;
2015 		}
2016 		ASSERT(sctp->sctp_unsent >= unsent);
2017 		sctp->sctp_unsent -= unsent;
2018 		/*
2019 		 * Update ULP the amount of queued data, which is
2020 		 * sent-unack'ed + unsent.
2021 		 */
2022 		if (!SCTP_IS_DETACHED(sctp))
2023 			SCTP_TXQ_UPDATE(sctp);
2024 		return (0);
2025 	}
2026 	return (-1);
2027 }
2028 
2029 uint32_t
sctp_cumack(sctp_t * sctp,uint32_t tsn,mblk_t ** first_unacked)2030 sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2031 {
2032 	mblk_t *ump, *nump, *mp = NULL;
2033 	uint16_t chunklen;
2034 	uint32_t xtsn;
2035 	sctp_faddr_t *fp;
2036 	sctp_data_hdr_t *sdc;
2037 	uint32_t cumack_forward = 0;
2038 	sctp_msg_hdr_t	*mhdr;
2039 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2040 
2041 	ump = sctp->sctp_xmit_head;
2042 
2043 	/*
2044 	 * Free messages only when they're completely acked.
2045 	 */
2046 	while (ump != NULL) {
2047 		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2048 		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2049 			if (SCTP_CHUNK_ABANDONED(mp)) {
2050 				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2051 				mp = NULL;
2052 				break;
2053 			}
2054 			/*
2055 			 * We check for abandoned message if we are PR-SCTP
2056 			 * aware, if this is not the first chunk in the
2057 			 * message (b_cont) and if the message is marked
2058 			 * abandoned.
2059 			 */
2060 			if (!SCTP_CHUNK_ISSENT(mp)) {
2061 				if (sctp->sctp_prsctp_aware &&
2062 				    mp != ump->b_cont &&
2063 				    (SCTP_IS_MSG_ABANDONED(ump) ||
2064 				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2065 				    sctp))) {
2066 					(void) sctp_check_abandoned_msg(sctp,
2067 					    ump);
2068 				}
2069 				goto cum_ack_done;
2070 			}
2071 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2072 			xtsn = ntohl(sdc->sdh_tsn);
2073 			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2074 				continue;
2075 			if (SEQ_GEQ(tsn, xtsn)) {
2076 				fp = SCTP_CHUNK_DEST(mp);
2077 				chunklen = ntohs(sdc->sdh_len);
2078 
2079 				if (sctp->sctp_out_time != 0 &&
2080 				    xtsn == sctp->sctp_rtt_tsn) {
2081 					/* Got a new RTT measurement */
2082 					sctp_update_rtt(sctp, fp,
2083 					    ddi_get_lbolt64() -
2084 					    sctp->sctp_out_time);
2085 					sctp->sctp_out_time = 0;
2086 				}
2087 				if (SCTP_CHUNK_ISACKED(mp))
2088 					continue;
2089 				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2090 				SCTP_CHUNK_ACKED(mp);
2091 				ASSERT(fp->sf_suna >= chunklen);
2092 				fp->sf_suna -= chunklen;
2093 				fp->sf_acked += chunklen;
2094 				cumack_forward += chunklen;
2095 				ASSERT(sctp->sctp_unacked >=
2096 				    (chunklen - sizeof (*sdc)));
2097 				sctp->sctp_unacked -=
2098 				    (chunklen - sizeof (*sdc));
2099 				if (fp->sf_suna == 0) {
2100 					/* all outstanding data acked */
2101 					fp->sf_pba = 0;
2102 					SCTP_FADDR_TIMER_STOP(fp);
2103 				} else {
2104 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2105 					    fp->sf_rto);
2106 				}
2107 			} else {
2108 				goto cum_ack_done;
2109 			}
2110 		}
2111 		nump = ump->b_next;
2112 		if (nump != NULL)
2113 			nump->b_prev = NULL;
2114 		if (ump == sctp->sctp_xmit_tail)
2115 			sctp->sctp_xmit_tail = nump;
2116 		if (SCTP_IS_MSG_ABANDONED(ump)) {
2117 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2118 			ump->b_next = NULL;
2119 			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2120 		} else {
2121 			sctp_free_msg(ump);
2122 		}
2123 		sctp->sctp_xmit_head = ump = nump;
2124 	}
2125 cum_ack_done:
2126 	*first_unacked = mp;
2127 	if (cumack_forward > 0) {
2128 		SCTPS_BUMP_MIB(sctps, sctpInAck);
2129 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2130 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2131 		}
2132 
2133 		/*
2134 		 * Update ULP the amount of queued data, which is
2135 		 * sent-unack'ed + unsent.
2136 		 */
2137 		if (!SCTP_IS_DETACHED(sctp))
2138 			SCTP_TXQ_UPDATE(sctp);
2139 
2140 		/* Time to send a shutdown? */
2141 		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2142 			sctp_send_shutdown(sctp, 0);
2143 		}
2144 		sctp->sctp_xmit_unacked = mp;
2145 	} else {
2146 		/* dup ack */
2147 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2148 	}
2149 	sctp->sctp_lastack_rxd = tsn;
2150 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2151 		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2152 	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2153 
2154 	return (cumack_forward);
2155 }
2156 
2157 static int
sctp_set_frwnd(sctp_t * sctp,uint32_t frwnd)2158 sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2159 {
2160 	uint32_t orwnd;
2161 
2162 	if (sctp->sctp_unacked > frwnd) {
2163 		sctp->sctp_frwnd = 0;
2164 		return (0);
2165 	}
2166 	orwnd = sctp->sctp_frwnd;
2167 	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2168 	if (orwnd < sctp->sctp_frwnd) {
2169 		return (1);
2170 	} else {
2171 		return (0);
2172 	}
2173 }
2174 
2175 /*
2176  * For un-ordered messages.
2177  * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2178  * less than/equal to ftsn. Fragments for un-ordered messages are
2179  * strictly in sequence (w.r.t TSN).
2180  */
2181 static int
sctp_ftsn_check_uo_frag(sctp_t * sctp,uint32_t ftsn)2182 sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2183 {
2184 	mblk_t		*hmp;
2185 	mblk_t		*hmp_next;
2186 	sctp_data_hdr_t	*dc;
2187 	int		dlen = 0;
2188 
2189 	hmp = sctp->sctp_uo_frags;
2190 	while (hmp != NULL) {
2191 		hmp_next = hmp->b_next;
2192 		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2193 		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2194 			return (dlen);
2195 		sctp->sctp_uo_frags = hmp_next;
2196 		if (hmp_next != NULL)
2197 			hmp_next->b_prev = NULL;
2198 		hmp->b_next = NULL;
2199 		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2200 		freeb(hmp);
2201 		hmp = hmp_next;
2202 	}
2203 	return (dlen);
2204 }
2205 
2206 /*
2207  * For ordered messages.
2208  * Check for existing fragments for an sid-ssn pair reported as abandoned,
2209  * hence will not receive, in the Forward TSN. If there are fragments, then
2210  * we just nuke them. If and when Partial Delivery API is supported, we
2211  * would need to send a notification to the upper layer about this.
2212  */
2213 static int
sctp_ftsn_check_frag(sctp_t * sctp,uint16_t ssn,sctp_instr_t * sip)2214 sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2215 {
2216 	sctp_reass_t	*srp;
2217 	mblk_t		*hmp;
2218 	mblk_t		*dmp;
2219 	mblk_t		*hmp_next;
2220 	sctp_data_hdr_t	*dc;
2221 	int		dlen = 0;
2222 
2223 	hmp = sip->istr_reass;
2224 	while (hmp != NULL) {
2225 		hmp_next = hmp->b_next;
2226 		srp = (sctp_reass_t *)DB_BASE(hmp);
2227 		if (SSN_GT(srp->sr_ssn, ssn))
2228 			return (dlen);
2229 		/*
2230 		 * If we had sent part of this message up, send a partial
2231 		 * delivery event. Since this is ordered delivery, we should
2232 		 * have sent partial message only for the next in sequence,
2233 		 * hence the ASSERT. See comments in sctp_data_chunk() for
2234 		 * trypartial.
2235 		 */
2236 		if (srp->sr_partial_delivered) {
2237 			if (srp->sr_ssn != sip->nextseq)
2238 				cmn_err(CE_WARN, "sctp partial"
2239 				    " delivery notify, sctp 0x%p"
2240 				    " sip = 0x%p ssn != nextseq"
2241 				    " ssn 0x%x nextseq 0x%x",
2242 				    (void *)sctp, (void *)sip,
2243 				    srp->sr_ssn, sip->nextseq);
2244 			ASSERT(sip->nextseq == srp->sr_ssn);
2245 			sctp_partial_delivery_event(sctp);
2246 		}
2247 		/* Take it out of the reass queue */
2248 		sip->istr_reass = hmp_next;
2249 		if (hmp_next != NULL)
2250 			hmp_next->b_prev = NULL;
2251 		hmp->b_next = NULL;
2252 		ASSERT(hmp->b_prev == NULL);
2253 		dmp = hmp;
2254 		ASSERT(DB_TYPE(hmp) == M_CTL);
2255 		dmp = hmp->b_cont;
2256 		hmp->b_cont = NULL;
2257 		freeb(hmp);
2258 		hmp = dmp;
2259 		while (dmp != NULL) {
2260 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2261 			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2262 			dmp = dmp->b_cont;
2263 		}
2264 		freemsg(hmp);
2265 		hmp = hmp_next;
2266 	}
2267 	return (dlen);
2268 }
2269 
2270 /*
2271  * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2272  * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2273  * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2274  * for this pair, if needed, and check if we can deliver subsequent
2275  * messages, if any, from the instream queue (that were waiting for this
2276  * sid-ssn message to show up). Once we are done try to update the SACK
2277  * info. We could get a duplicate Forward TSN, in which case just send
2278  * a SACK. If any of the sid values in the Forward TSN is invalid,
2279  * send back an "Invalid Stream Identifier" error and continue processing
2280  * the rest.
2281  */
2282 static void
sctp_process_forward_tsn(sctp_t * sctp,sctp_chunk_hdr_t * ch,sctp_faddr_t * fp,ip_pkt_t * ipp,ip_recv_attr_t * ira)2283 sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2284     ip_pkt_t *ipp, ip_recv_attr_t *ira)
2285 {
2286 	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2287 	ftsn_entry_t	*ftsn_entry;
2288 	sctp_instr_t	*instr;
2289 	boolean_t	can_deliver = B_TRUE;
2290 	size_t		dlen;
2291 	int		flen;
2292 	mblk_t		*dmp;
2293 	mblk_t		*pmp;
2294 	sctp_data_hdr_t	*dc;
2295 	ssize_t		remaining;
2296 	sctp_stack_t	*sctps = sctp->sctp_sctps;
2297 
2298 	*ftsn = ntohl(*ftsn);
2299 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2300 
2301 	if (SCTP_IS_DETACHED(sctp)) {
2302 		SCTPS_BUMP_MIB(sctps, sctpInClosed);
2303 		can_deliver = B_FALSE;
2304 	}
2305 	/*
2306 	 * un-ordered messages don't have SID-SSN pair entries, we check
2307 	 * for any fragments (for un-ordered message) to be discarded using
2308 	 * the cumulative FTSN.
2309 	 */
2310 	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2311 	if (flen > 0) {
2312 		ASSERT(sctp->sctp_rxqueued >= flen);
2313 		sctp->sctp_rxqueued -= flen;
2314 	}
2315 	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2316 	while (remaining >= sizeof (*ftsn_entry)) {
2317 		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2318 		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2319 		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2320 			sctp_bsc_t	inval_parm;
2321 
2322 			/* Will populate the CAUSE block in the ERROR chunk. */
2323 			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2324 			/* RESERVED, ignored at the receiving end */
2325 			inval_parm.bsc_pad = 0;
2326 
2327 			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2328 			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2329 			ftsn_entry++;
2330 			remaining -= sizeof (*ftsn_entry);
2331 			continue;
2332 		}
2333 		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2334 		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2335 		/* Indicates frags were nuked, update rxqueued */
2336 		if (flen > 0) {
2337 			ASSERT(sctp->sctp_rxqueued >= flen);
2338 			sctp->sctp_rxqueued -= flen;
2339 		}
2340 		/*
2341 		 * It is possible to receive an FTSN chunk with SSN smaller
2342 		 * than then nextseq if this chunk is a retransmission because
2343 		 * of incomplete processing when it was first processed.
2344 		 */
2345 		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2346 			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2347 		while (instr->istr_nmsgs > 0) {
2348 			mblk_t	*next;
2349 
2350 			dmp = (mblk_t *)instr->istr_msgs;
2351 			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2352 			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2353 				break;
2354 
2355 			next = dmp->b_next;
2356 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2357 			for (pmp = dmp->b_cont; pmp != NULL;
2358 			    pmp = pmp->b_cont) {
2359 				dlen += MBLKL(pmp);
2360 			}
2361 			if (can_deliver) {
2362 				int error;
2363 
2364 				dmp->b_rptr = (uchar_t *)(dc + 1);
2365 				dmp->b_next = NULL;
2366 				ASSERT(dmp->b_prev == NULL);
2367 				if (sctp_input_add_ancillary(sctp,
2368 				    &dmp, dc, fp, ipp, ira) == 0) {
2369 					sctp->sctp_rxqueued -= dlen;
2370 					/*
2371 					 * Override b_flag for SCTP sockfs
2372 					 * internal use
2373 					 */
2374 
2375 					dmp->b_flag = 0;
2376 					if (sctp->sctp_flowctrld) {
2377 						sctp->sctp_rwnd -= dlen;
2378 						if (sctp->sctp_rwnd < 0)
2379 							sctp->sctp_rwnd = 0;
2380 					}
2381 					if (sctp->sctp_ulp_recv(
2382 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2383 					    0, &error, NULL) <= 0) {
2384 						sctp->sctp_flowctrld = B_TRUE;
2385 					}
2386 				} else {
2387 					/*
2388 					 * We will resume processing when
2389 					 * the FTSN chunk is re-xmitted.
2390 					 */
2391 					dmp->b_rptr = (uchar_t *)dc;
2392 					dmp->b_next = next;
2393 					dprint(0,
2394 					    ("FTSN dequeuing %u failed\n",
2395 					    ntohs(dc->sdh_ssn)));
2396 					return;
2397 				}
2398 			} else {
2399 				sctp->sctp_rxqueued -= dlen;
2400 				ASSERT(dmp->b_prev == NULL);
2401 				dmp->b_next = NULL;
2402 				freemsg(dmp);
2403 			}
2404 			instr->istr_nmsgs--;
2405 			instr->nextseq++;
2406 			sctp->sctp_istr_nmsgs--;
2407 			if (next != NULL)
2408 				next->b_prev = NULL;
2409 			instr->istr_msgs = next;
2410 		}
2411 		ftsn_entry++;
2412 		remaining -= sizeof (*ftsn_entry);
2413 	}
2414 	/* Duplicate FTSN */
2415 	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2416 		sctp->sctp_force_sack = 1;
2417 		return;
2418 	}
2419 	/* Advance cum TSN to that reported in the Forward TSN chunk */
2420 	sctp->sctp_ftsn = *ftsn + 1;
2421 
2422 	/* Remove all the SACK gaps before the new cum TSN */
2423 	if (sctp->sctp_sack_info != NULL) {
2424 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2425 		    &sctp->sctp_sack_gaps);
2426 	}
2427 	/*
2428 	 * If there are gap reports pending, check if advancing
2429 	 * the ftsn here closes a gap. If so, we can advance
2430 	 * ftsn to the end of the set.
2431 	 * If ftsn has moved forward, maybe we can remove gap reports.
2432 	 */
2433 	if (sctp->sctp_sack_info != NULL &&
2434 	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2435 		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2436 		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2437 		    &sctp->sctp_sack_gaps);
2438 	}
2439 }
2440 
2441 /*
2442  * When we have processed a SACK we check to see if we can advance the
2443  * cumulative TSN if there are abandoned chunks immediately following
2444  * the updated cumulative TSN. If there are, we attempt to send a
2445  * Forward TSN chunk.
2446  */
2447 static void
sctp_check_abandoned_data(sctp_t * sctp,sctp_faddr_t * fp)2448 sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2449 {
2450 	mblk_t		*meta = sctp->sctp_xmit_head;
2451 	mblk_t		*mp;
2452 	mblk_t		*nmp;
2453 	uint32_t	seglen;
2454 	uint32_t	adv_pap = sctp->sctp_adv_pap;
2455 
2456 	/*
2457 	 * We only check in the first meta since otherwise we can't
2458 	 * advance the cumulative ack point. We just look for chunks
2459 	 * marked for retransmission, else we might prematurely
2460 	 * send an FTSN for a sent, but unacked, chunk.
2461 	 */
2462 	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2463 		if (!SCTP_CHUNK_ISSENT(mp))
2464 			return;
2465 		if (SCTP_CHUNK_WANT_REXMIT(mp))
2466 			break;
2467 	}
2468 	if (mp == NULL)
2469 		return;
2470 	sctp_check_adv_ack_pt(sctp, meta, mp);
2471 	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2472 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2473 		if (nmp == NULL) {
2474 			sctp->sctp_adv_pap = adv_pap;
2475 			if (!fp->sf_timer_running)
2476 				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2477 			return;
2478 		}
2479 		sctp_set_iplen(sctp, nmp, fp->sf_ixa);
2480 		(void) conn_ip_output(nmp, fp->sf_ixa);
2481 		BUMP_LOCAL(sctp->sctp_opkts);
2482 		if (!fp->sf_timer_running)
2483 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2484 	}
2485 }
2486 
2487 /*
2488  * The processing here follows the same logic in sctp_got_sack(), the reason
2489  * we do this separately is because, usually, gap blocks are ordered and
2490  * we can process it in sctp_got_sack(). However if they aren't we would
2491  * need to do some additional non-optimal stuff when we start processing the
2492  * unordered gaps. To that effect sctp_got_sack() does the processing in the
2493  * simple case and this does the same in the more involved case.
2494  */
2495 static uint32_t
sctp_process_uo_gaps(sctp_t * sctp,uint32_t ctsn,sctp_sack_frag_t * ssf,int num_gaps,mblk_t * umphead,mblk_t * mphead,int * trysend,boolean_t * fast_recovery,uint32_t fr_xtsn)2496 sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2497     int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2498     boolean_t *fast_recovery, uint32_t fr_xtsn)
2499 {
2500 	uint32_t		xtsn;
2501 	uint32_t		gapstart = 0;
2502 	uint32_t		gapend = 0;
2503 	int			gapcnt;
2504 	uint16_t		chunklen;
2505 	sctp_data_hdr_t		*sdc;
2506 	int			gstart;
2507 	mblk_t			*ump = umphead;
2508 	mblk_t			*mp = mphead;
2509 	sctp_faddr_t		*fp;
2510 	uint32_t		acked = 0;
2511 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2512 
2513 	/*
2514 	 * gstart tracks the last (in the order of TSN) gapstart that
2515 	 * we process in this SACK gaps walk.
2516 	 */
2517 	gstart = ctsn;
2518 
2519 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2520 	xtsn = ntohl(sdc->sdh_tsn);
2521 	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2522 		if (gapstart != 0) {
2523 			/*
2524 			 * If we have reached the end of the transmit list or
2525 			 * hit an unsent chunk or encountered an unordered gap
2526 			 * block start from the ctsn again.
2527 			 */
2528 			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2529 			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2530 				ump = umphead;
2531 				mp = mphead;
2532 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2533 				xtsn = ntohl(sdc->sdh_tsn);
2534 			}
2535 		}
2536 
2537 		gapstart = ctsn + ntohs(ssf->ssf_start);
2538 		gapend = ctsn + ntohs(ssf->ssf_end);
2539 
2540 		/*
2541 		 * Sanity checks:
2542 		 *
2543 		 * 1. SACK for TSN we have not sent - ABORT
2544 		 * 2. Invalid or spurious gaps, ignore all gaps
2545 		 */
2546 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2547 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2548 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2549 			*trysend = -1;
2550 			return (acked);
2551 		} else if (SEQ_LT(gapend, gapstart) ||
2552 		    SEQ_LEQ(gapstart, ctsn)) {
2553 			break;
2554 		}
2555 		/*
2556 		 * The xtsn can be the TSN processed for the last gap
2557 		 * (gapend) or it could be the cumulative TSN. We continue
2558 		 * with the last xtsn as long as the gaps are ordered, when
2559 		 * we hit an unordered gap, we re-start from the cumulative
2560 		 * TSN. For the first gap it is always the cumulative TSN.
2561 		 */
2562 		while (xtsn != gapstart) {
2563 			/*
2564 			 * We can't reliably check for reneged chunks
2565 			 * when walking the unordered list, so we don't.
2566 			 * In case the peer reneges then we will end up
2567 			 * sending the reneged chunk via timeout.
2568 			 */
2569 			mp = mp->b_next;
2570 			if (mp == NULL) {
2571 				ump = ump->b_next;
2572 				/*
2573 				 * ump can't be NULL because of the sanity
2574 				 * check above.
2575 				 */
2576 				ASSERT(ump != NULL);
2577 				mp = ump->b_cont;
2578 			}
2579 			/*
2580 			 * mp can't be unsent because of the sanity check
2581 			 * above.
2582 			 */
2583 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2584 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2585 			xtsn = ntohl(sdc->sdh_tsn);
2586 		}
2587 		/*
2588 		 * Now that we have found the chunk with TSN == 'gapstart',
2589 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2590 		 * All intermediate chunks will be marked ACKED, if they
2591 		 * haven't already been.
2592 		 */
2593 		while (SEQ_LEQ(xtsn, gapend)) {
2594 			/*
2595 			 * SACKed
2596 			 */
2597 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2598 			if (!SCTP_CHUNK_ISACKED(mp)) {
2599 				SCTP_CHUNK_ACKED(mp);
2600 
2601 				fp = SCTP_CHUNK_DEST(mp);
2602 				chunklen = ntohs(sdc->sdh_len);
2603 				ASSERT(fp->sf_suna >= chunklen);
2604 				fp->sf_suna -= chunklen;
2605 				if (fp->sf_suna == 0) {
2606 					/* All outstanding data acked. */
2607 					fp->sf_pba = 0;
2608 					SCTP_FADDR_TIMER_STOP(fp);
2609 				}
2610 				fp->sf_acked += chunklen;
2611 				acked += chunklen;
2612 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2613 				ASSERT(sctp->sctp_unacked >= 0);
2614 			}
2615 			/*
2616 			 * Move to the next message in the transmit list
2617 			 * if we are done with all the chunks from the current
2618 			 * message. Note, it is possible to hit the end of the
2619 			 * transmit list here, i.e. if we have already completed
2620 			 * processing the gap block.
2621 			 */
2622 			mp = mp->b_next;
2623 			if (mp == NULL) {
2624 				ump = ump->b_next;
2625 				if (ump == NULL) {
2626 					ASSERT(xtsn == gapend);
2627 					break;
2628 				}
2629 				mp = ump->b_cont;
2630 			}
2631 			/*
2632 			 * Likewise, we can hit an unsent chunk once we have
2633 			 * completed processing the gap block.
2634 			 */
2635 			if (!SCTP_CHUNK_ISSENT(mp)) {
2636 				ASSERT(xtsn == gapend);
2637 				break;
2638 			}
2639 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2640 			xtsn = ntohl(sdc->sdh_tsn);
2641 		}
2642 		/*
2643 		 * We keep track of the last gap we successfully processed
2644 		 * so that we can terminate the walk below for incrementing
2645 		 * the SACK count.
2646 		 */
2647 		if (SEQ_LT(gstart, gapstart))
2648 			gstart = gapstart;
2649 	}
2650 	/*
2651 	 * Check if have incremented the SACK count for all unacked TSNs in
2652 	 * sctp_got_sack(), if so we are done.
2653 	 */
2654 	if (SEQ_LEQ(gstart, fr_xtsn))
2655 		return (acked);
2656 
2657 	ump = umphead;
2658 	mp = mphead;
2659 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2660 	xtsn = ntohl(sdc->sdh_tsn);
2661 	while (SEQ_LT(xtsn, gstart)) {
2662 		/*
2663 		 * We have incremented SACK count for TSNs less than fr_tsn
2664 		 * in sctp_got_sack(), so don't increment them again here.
2665 		 */
2666 		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2667 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2668 			if (SCTP_CHUNK_SACKCNT(mp) ==
2669 			    sctps->sctps_fast_rxt_thresh) {
2670 				SCTP_CHUNK_REXMIT(sctp, mp);
2671 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2672 				*trysend = 1;
2673 				if (!*fast_recovery) {
2674 					/*
2675 					 * Entering fast recovery.
2676 					 */
2677 					fp = SCTP_CHUNK_DEST(mp);
2678 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2679 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2680 						fp->sf_ssthresh =
2681 						    2 * fp->sf_pmss;
2682 					}
2683 					fp->sf_cwnd = fp->sf_ssthresh;
2684 					fp->sf_pba = 0;
2685 					sctp->sctp_recovery_tsn =
2686 					    sctp->sctp_ltsn - 1;
2687 					*fast_recovery = B_TRUE;
2688 				}
2689 			}
2690 		}
2691 		mp = mp->b_next;
2692 		if (mp == NULL) {
2693 			ump = ump->b_next;
2694 			/* We can't get to the end of the transmit list here */
2695 			ASSERT(ump != NULL);
2696 			mp = ump->b_cont;
2697 		}
2698 		/* We can't hit an unsent chunk here */
2699 		ASSERT(SCTP_CHUNK_ISSENT(mp));
2700 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2701 		xtsn = ntohl(sdc->sdh_tsn);
2702 	}
2703 	return (acked);
2704 }
2705 
2706 static int
sctp_got_sack(sctp_t * sctp,sctp_chunk_hdr_t * sch)2707 sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2708 {
2709 	sctp_sack_chunk_t	*sc;
2710 	sctp_data_hdr_t		*sdc;
2711 	sctp_sack_frag_t	*ssf;
2712 	mblk_t			*ump;
2713 	mblk_t			*mp;
2714 	mblk_t			*mp1;
2715 	uint32_t		cumtsn;
2716 	uint32_t		xtsn;
2717 	uint32_t		gapstart = 0;
2718 	uint32_t		gapend = 0;
2719 	uint32_t		acked = 0;
2720 	uint16_t		chunklen;
2721 	sctp_faddr_t		*fp;
2722 	int			num_gaps;
2723 	int			trysend = 0;
2724 	int			i;
2725 	boolean_t		fast_recovery = B_FALSE;
2726 	boolean_t		cumack_forward = B_FALSE;
2727 	boolean_t		fwd_tsn = B_FALSE;
2728 	sctp_stack_t		*sctps = sctp->sctp_sctps;
2729 
2730 	BUMP_LOCAL(sctp->sctp_ibchunks);
2731 	BUMP_LOCAL(sctp->sctp_isacks);
2732 	chunklen = ntohs(sch->sch_len);
2733 	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2734 		return (0);
2735 
2736 	sc = (sctp_sack_chunk_t *)(sch + 1);
2737 	cumtsn = ntohl(sc->ssc_cumtsn);
2738 
2739 	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2740 	    cumtsn));
2741 
2742 	/* out of order */
2743 	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2744 		return (0);
2745 
2746 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2747 		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2748 		/* Send an ABORT */
2749 		return (-1);
2750 	}
2751 
2752 	/*
2753 	 * Cwnd only done when not in fast recovery mode.
2754 	 */
2755 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2756 		fast_recovery = B_TRUE;
2757 
2758 	/*
2759 	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2760 	 */
2761 	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2762 		fwd_tsn = B_TRUE;
2763 
2764 	if (cumtsn == sctp->sctp_lastack_rxd &&
2765 	    (sctp->sctp_xmit_unacked == NULL ||
2766 	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2767 		if (sctp->sctp_xmit_unacked != NULL)
2768 			mp = sctp->sctp_xmit_unacked;
2769 		else if (sctp->sctp_xmit_head != NULL)
2770 			mp = sctp->sctp_xmit_head->b_cont;
2771 		else
2772 			mp = NULL;
2773 		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2774 		/*
2775 		 * If we were doing a zero win probe and the win
2776 		 * has now opened to at least MSS, re-transmit the
2777 		 * zero win probe via sctp_rexmit_packet().
2778 		 */
2779 		if (mp != NULL && sctp->sctp_zero_win_probe &&
2780 		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sf_pmss) {
2781 			mblk_t	*pkt;
2782 			uint_t	pkt_len;
2783 			mblk_t	*mp1 = mp;
2784 			mblk_t	*meta = sctp->sctp_xmit_head;
2785 
2786 			/*
2787 			 * Reset the RTO since we have been backing-off
2788 			 * to send the ZWP.
2789 			 */
2790 			fp = sctp->sctp_current;
2791 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2792 			SCTP_MAX_RTO(sctp, fp);
2793 			/* Resend the ZWP */
2794 			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2795 			    &pkt_len);
2796 			if (pkt == NULL) {
2797 				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2798 				return (0);
2799 			}
2800 			ASSERT(pkt_len <= fp->sf_pmss);
2801 			sctp->sctp_zero_win_probe = B_FALSE;
2802 			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2803 			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2804 			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2805 			(void) conn_ip_output(pkt, fp->sf_ixa);
2806 			BUMP_LOCAL(sctp->sctp_opkts);
2807 		}
2808 	} else {
2809 		if (sctp->sctp_zero_win_probe) {
2810 			/*
2811 			 * Reset the RTO since we have been backing-off
2812 			 * to send the ZWP.
2813 			 */
2814 			fp = sctp->sctp_current;
2815 			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2816 			SCTP_MAX_RTO(sctp, fp);
2817 			sctp->sctp_zero_win_probe = B_FALSE;
2818 			/* This is probably not required */
2819 			if (!sctp->sctp_rexmitting) {
2820 				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2821 				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2822 			}
2823 		}
2824 		acked = sctp_cumack(sctp, cumtsn, &mp);
2825 		sctp->sctp_xmit_unacked = mp;
2826 		if (acked > 0) {
2827 			trysend = 1;
2828 			cumack_forward = B_TRUE;
2829 			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2830 			    sctp->sctp_adv_pap)) {
2831 				cumack_forward = B_FALSE;
2832 			}
2833 		}
2834 	}
2835 	num_gaps = ntohs(sc->ssc_numfrags);
2836 	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2837 	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2838 	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2839 	    num_gaps * sizeof (*ssf))) {
2840 		goto ret;
2841 	}
2842 #ifdef	DEBUG
2843 	/*
2844 	 * Since we delete any message that has been acked completely,
2845 	 * the unacked chunk must belong to sctp_xmit_head (as
2846 	 * we don't have a back pointer from the mp to the meta data
2847 	 * we do this).
2848 	 */
2849 	{
2850 		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2851 
2852 		while (mp2 != NULL) {
2853 			if (mp2 == mp)
2854 				break;
2855 			mp2 = mp2->b_next;
2856 		}
2857 		ASSERT(mp2 != NULL);
2858 	}
2859 #endif
2860 	ump = sctp->sctp_xmit_head;
2861 
2862 	/*
2863 	 * Just remember where we started from, in case we need to call
2864 	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2865 	 */
2866 	mp1 = mp;
2867 
2868 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2869 	xtsn = ntohl(sdc->sdh_tsn);
2870 	ASSERT(xtsn == cumtsn + 1);
2871 
2872 	/*
2873 	 * Go through SACK gaps. They are ordered based on start TSN.
2874 	 */
2875 	ssf = (sctp_sack_frag_t *)(sc + 1);
2876 	for (i = 0; i < num_gaps; i++, ssf++) {
2877 		if (gapstart != 0) {
2878 			/* check for unordered gap */
2879 			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2880 				acked += sctp_process_uo_gaps(sctp,
2881 				    cumtsn, ssf, num_gaps - i,
2882 				    sctp->sctp_xmit_head, mp1,
2883 				    &trysend, &fast_recovery, gapstart);
2884 				if (trysend < 0) {
2885 					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2886 					return (-1);
2887 				}
2888 				break;
2889 			}
2890 		}
2891 		gapstart = cumtsn + ntohs(ssf->ssf_start);
2892 		gapend = cumtsn + ntohs(ssf->ssf_end);
2893 
2894 		/*
2895 		 * Sanity checks:
2896 		 *
2897 		 * 1. SACK for TSN we have not sent - ABORT
2898 		 * 2. Invalid or spurious gaps, ignore all gaps
2899 		 */
2900 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2901 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2902 			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2903 			return (-1);
2904 		} else if (SEQ_LT(gapend, gapstart) ||
2905 		    SEQ_LEQ(gapstart, cumtsn)) {
2906 			break;
2907 		}
2908 		/*
2909 		 * Let's start at the current TSN (for the 1st gap we start
2910 		 * from the cumulative TSN, for subsequent ones we start from
2911 		 * where the previous gapend was found - second while loop
2912 		 * below) and walk the transmit list till we find the TSN
2913 		 * corresponding to gapstart. All the unacked chunks till we
2914 		 * get to the chunk with TSN == gapstart will have their
2915 		 * SACKCNT incremented by 1. Note since the gap blocks are
2916 		 * ordered, we won't be incrementing the SACKCNT for an
2917 		 * unacked chunk by more than one while processing the gap
2918 		 * blocks. If the SACKCNT for any unacked chunk exceeds
2919 		 * the fast retransmit threshold, we will fast retransmit
2920 		 * after processing all the gap blocks.
2921 		 */
2922 		ASSERT(SEQ_LEQ(xtsn, gapstart));
2923 		while (xtsn != gapstart) {
2924 			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2925 			if (SCTP_CHUNK_SACKCNT(mp) ==
2926 			    sctps->sctps_fast_rxt_thresh) {
2927 				SCTP_CHUNK_REXMIT(sctp, mp);
2928 				sctp->sctp_chk_fast_rexmit = B_TRUE;
2929 				trysend = 1;
2930 				if (!fast_recovery) {
2931 					/*
2932 					 * Entering fast recovery.
2933 					 */
2934 					fp = SCTP_CHUNK_DEST(mp);
2935 					fp->sf_ssthresh = fp->sf_cwnd / 2;
2936 					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2937 						fp->sf_ssthresh =
2938 						    2 * fp->sf_pmss;
2939 					}
2940 					fp->sf_cwnd = fp->sf_ssthresh;
2941 					fp->sf_pba = 0;
2942 					sctp->sctp_recovery_tsn =
2943 					    sctp->sctp_ltsn - 1;
2944 					fast_recovery = B_TRUE;
2945 				}
2946 			}
2947 
2948 			/*
2949 			 * Peer may have reneged on this chunk, so un-sack
2950 			 * it now. If the peer did renege, we need to
2951 			 * readjust unacked.
2952 			 */
2953 			if (SCTP_CHUNK_ISACKED(mp)) {
2954 				chunklen = ntohs(sdc->sdh_len);
2955 				fp = SCTP_CHUNK_DEST(mp);
2956 				fp->sf_suna += chunklen;
2957 				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2958 				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2959 				if (!fp->sf_timer_running) {
2960 					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2961 					    fp->sf_rto);
2962 				}
2963 			}
2964 
2965 			mp = mp->b_next;
2966 			if (mp == NULL) {
2967 				ump = ump->b_next;
2968 				/*
2969 				 * ump can't be NULL given the sanity check
2970 				 * above.  But if it is NULL, it means that
2971 				 * there is a data corruption.  We'd better
2972 				 * panic.
2973 				 */
2974 				if (ump == NULL) {
2975 					panic("Memory corruption detected: gap "
2976 					    "start TSN 0x%x missing from the "
2977 					    "xmit list: %p", gapstart,
2978 					    (void *)sctp);
2979 				}
2980 				mp = ump->b_cont;
2981 			}
2982 			/*
2983 			 * mp can't be unsent given the sanity check above.
2984 			 */
2985 			ASSERT(SCTP_CHUNK_ISSENT(mp));
2986 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2987 			xtsn = ntohl(sdc->sdh_tsn);
2988 		}
2989 		/*
2990 		 * Now that we have found the chunk with TSN == 'gapstart',
2991 		 * let's walk till we hit the chunk with TSN == 'gapend'.
2992 		 * All intermediate chunks will be marked ACKED, if they
2993 		 * haven't already been.
2994 		 */
2995 		while (SEQ_LEQ(xtsn, gapend)) {
2996 			/*
2997 			 * SACKed
2998 			 */
2999 			SCTP_CHUNK_SET_SACKCNT(mp, 0);
3000 			if (!SCTP_CHUNK_ISACKED(mp)) {
3001 				SCTP_CHUNK_ACKED(mp);
3002 
3003 				fp = SCTP_CHUNK_DEST(mp);
3004 				chunklen = ntohs(sdc->sdh_len);
3005 				ASSERT(fp->sf_suna >= chunklen);
3006 				fp->sf_suna -= chunklen;
3007 				if (fp->sf_suna == 0) {
3008 					/* All outstanding data acked. */
3009 					fp->sf_pba = 0;
3010 					SCTP_FADDR_TIMER_STOP(fp);
3011 				}
3012 				fp->sf_acked += chunklen;
3013 				acked += chunklen;
3014 				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3015 				ASSERT(sctp->sctp_unacked >= 0);
3016 			}
3017 			/* Go to the next chunk of the current message */
3018 			mp = mp->b_next;
3019 			/*
3020 			 * Move to the next message in the transmit list
3021 			 * if we are done with all the chunks from the current
3022 			 * message. Note, it is possible to hit the end of the
3023 			 * transmit list here, i.e. if we have already completed
3024 			 * processing the gap block.  But the TSN must be equal
3025 			 * to the gapend because of the above sanity check.
3026 			 * If it is not equal, it means that some data is
3027 			 * missing.
3028 			 * Also, note that we break here, which means we
3029 			 * continue processing gap blocks, if any. In case of
3030 			 * ordered gap blocks there can't be any following
3031 			 * this (if there is it will fail the sanity check
3032 			 * above). In case of un-ordered gap blocks we will
3033 			 * switch to sctp_process_uo_gaps().  In either case
3034 			 * it should be fine to continue with NULL ump/mp,
3035 			 * but we just reset it to xmit_head.
3036 			 */
3037 			if (mp == NULL) {
3038 				ump = ump->b_next;
3039 				if (ump == NULL) {
3040 					if (xtsn != gapend) {
3041 						panic("Memory corruption "
3042 						    "detected: gap end TSN "
3043 						    "0x%x missing from the "
3044 						    "xmit list: %p", gapend,
3045 						    (void *)sctp);
3046 					}
3047 					ump = sctp->sctp_xmit_head;
3048 					mp = mp1;
3049 					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3050 					xtsn = ntohl(sdc->sdh_tsn);
3051 					break;
3052 				}
3053 				mp = ump->b_cont;
3054 			}
3055 			/*
3056 			 * Likewise, we could hit an unsent chunk once we have
3057 			 * completed processing the gap block. Again, it is
3058 			 * fine to continue processing gap blocks with mp
3059 			 * pointing to the unsent chunk, because if there
3060 			 * are more ordered gap blocks, they will fail the
3061 			 * sanity check, and if there are un-ordered gap blocks,
3062 			 * we will continue processing in sctp_process_uo_gaps()
3063 			 * We just reset the mp to the one we started with.
3064 			 */
3065 			if (!SCTP_CHUNK_ISSENT(mp)) {
3066 				ASSERT(xtsn == gapend);
3067 				ump = sctp->sctp_xmit_head;
3068 				mp = mp1;
3069 				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3070 				xtsn = ntohl(sdc->sdh_tsn);
3071 				break;
3072 			}
3073 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3074 			xtsn = ntohl(sdc->sdh_tsn);
3075 		}
3076 	}
3077 	if (sctp->sctp_prsctp_aware)
3078 		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3079 	if (sctp->sctp_chk_fast_rexmit)
3080 		sctp_fast_rexmit(sctp);
3081 ret:
3082 	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3083 
3084 	/*
3085 	 * If receive window is closed while there is unsent data,
3086 	 * set a timer for doing zero window probes.
3087 	 */
3088 	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3089 	    sctp->sctp_unsent != 0) {
3090 		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3091 		    sctp->sctp_current->sf_rto);
3092 	}
3093 
3094 	/*
3095 	 * Set cwnd for all destinations.
3096 	 * Congestion window gets increased only when cumulative
3097 	 * TSN moves forward, we're not in fast recovery, and
3098 	 * cwnd has been fully utilized (almost fully, need to allow
3099 	 * some leeway due to non-MSS sized messages).
3100 	 */
3101 	if (sctp->sctp_current->sf_acked == acked) {
3102 		/*
3103 		 * Fast-path, only data sent to sctp_current got acked.
3104 		 */
3105 		fp = sctp->sctp_current;
3106 		if (cumack_forward && !fast_recovery &&
3107 		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3108 			if (fp->sf_cwnd < fp->sf_ssthresh) {
3109 				/*
3110 				 * Slow start
3111 				 */
3112 				if (fp->sf_acked > fp->sf_pmss) {
3113 					fp->sf_cwnd += fp->sf_pmss;
3114 				} else {
3115 					fp->sf_cwnd += fp->sf_acked;
3116 				}
3117 				fp->sf_cwnd = MIN(fp->sf_cwnd,
3118 				    sctp->sctp_cwnd_max);
3119 			} else {
3120 				/*
3121 				 * Congestion avoidance
3122 				 */
3123 				fp->sf_pba += fp->sf_acked;
3124 				if (fp->sf_pba >= fp->sf_cwnd) {
3125 					fp->sf_pba -= fp->sf_cwnd;
3126 					fp->sf_cwnd += fp->sf_pmss;
3127 					fp->sf_cwnd = MIN(fp->sf_cwnd,
3128 					    sctp->sctp_cwnd_max);
3129 				}
3130 			}
3131 		}
3132 		/*
3133 		 * Limit the burst of transmitted data segments.
3134 		 */
3135 		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3136 		    fp->sf_cwnd) {
3137 			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3138 			    fp->sf_pmss;
3139 		}
3140 		fp->sf_acked = 0;
3141 		goto