xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_frag.c (revision f4b3ec61)
1 /*
2  * Copyright (C) 1993-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #ifdef __hpux
24 # include <sys/timeout.h>
25 #endif
26 #if !defined(_KERNEL)
27 # include <stdio.h>
28 # include <string.h>
29 # include <stdlib.h>
30 # define _KERNEL
31 # ifdef __OpenBSD__
32 struct file;
33 # endif
34 # include <sys/uio.h>
35 # undef _KERNEL
36 #endif
37 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
38 # include <sys/filio.h>
39 # include <sys/fcntl.h>
40 #else
41 # include <sys/ioctl.h>
42 #endif
43 #if !defined(linux)
44 # include <sys/protosw.h>
45 #endif
46 #include <sys/socket.h>
47 #if defined(_KERNEL)
48 # include <sys/systm.h>
49 # if !defined(__SVR4) && !defined(__svr4__)
50 #  include <sys/mbuf.h>
51 # endif
52 #endif
53 #if !defined(__SVR4) && !defined(__svr4__)
54 # if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
55 #  include <sys/kernel.h>
56 # endif
57 #else
58 # include <sys/byteorder.h>
59 # ifdef _KERNEL
60 #  include <sys/dditypes.h>
61 # endif
62 # include <sys/stream.h>
63 # include <sys/kmem.h>
64 #endif
65 #include <net/if.h>
66 #ifdef sun
67 # include <net/af.h>
68 #endif
69 #include <net/route.h>
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/ip.h>
73 #if !defined(linux)
74 # include <netinet/ip_var.h>
75 #endif
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 #include <netinet/ip_icmp.h>
79 #include "netinet/ip_compat.h"
80 #include <netinet/tcpip.h>
81 #include "netinet/ip_fil.h"
82 #include "netinet/ip_nat.h"
83 #include "netinet/ip_frag.h"
84 #include "netinet/ip_state.h"
85 #include "netinet/ip_auth.h"
86 #include "netinet/ipf_stack.h"
87 #if (__FreeBSD_version >= 300000)
88 # include <sys/malloc.h>
89 # if defined(_KERNEL)
90 #  ifndef IPFILTER_LKM
91 #   include <sys/libkern.h>
92 #   include <sys/systm.h>
93 #  endif
94 extern struct callout_handle fr_slowtimer_ch;
95 # endif
96 #endif
97 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
98 # include <sys/callout.h>
99 extern struct callout fr_slowtimer_ch;
100 #endif
101 #if defined(__OpenBSD__)
102 # include <sys/timeout.h>
103 extern struct timeout fr_slowtimer_ch;
104 #endif
105 /* END OF INCLUDES */
106 
107 #if !defined(lint)
108 static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
109 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
110 #endif
111 
112 static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
113 static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
114 static void fr_fragdelete __P((ipfr_t *, ipfr_t ***, ipf_stack_t *));
115 
116 /* ------------------------------------------------------------------------ */
117 /* Function:    fr_fraginit                                                 */
118 /* Returns:     int - 0 == success, -1 == error                             */
119 /* Parameters:  Nil                                                         */
120 /*                                                                          */
121 /* Initialise the hash tables for the fragment cache lookups.               */
122 /* ------------------------------------------------------------------------ */
123 int fr_fraginit(ifs)
124 ipf_stack_t *ifs;
125 {
126 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
127 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
128 	ifs->ifs_ipfr_ipidtail = &ifs->ifs_ipfr_ipidlist;
129 	ifs->ifs_ipfr_size = IPFT_SIZE;
130 	ifs->ifs_fr_ipfrttl = 120;	/* 60 seconds */
131 
132 	KMALLOCS(ifs->ifs_ipfr_heads, ipfr_t **,
133 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
134 	if (ifs->ifs_ipfr_heads == NULL)
135 		return -1;
136 	bzero((char *)ifs->ifs_ipfr_heads,
137 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
138 
139 	KMALLOCS(ifs->ifs_ipfr_nattab, ipfr_t **,
140 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
141 	if (ifs->ifs_ipfr_nattab == NULL)
142 		return -1;
143 	bzero((char *)ifs->ifs_ipfr_nattab,
144 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
145 
146 	KMALLOCS(ifs->ifs_ipfr_ipidtab, ipfr_t **,
147 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
148 	if (ifs->ifs_ipfr_ipidtab == NULL)
149 		return -1;
150 	bzero((char *)ifs->ifs_ipfr_ipidtab,
151 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
152 
153 	RWLOCK_INIT(&ifs->ifs_ipf_frag, "ipf fragment rwlock");
154 
155 	/* Initialise frblock with "block in all" */
156 	bzero((char *)&ifs->ifs_frblock, sizeof(ifs->ifs_frblock));
157 	ifs->ifs_frblock.fr_flags = FR_BLOCK|FR_INQUE;	/* block in */
158 	ifs->ifs_frblock.fr_ref = 1;
159 
160 	ifs->ifs_fr_frag_init = 1;
161 
162 	return 0;
163 }
164 
165 
166 /* ------------------------------------------------------------------------ */
167 /* Function:    fr_fragunload                                               */
168 /* Returns:     Nil                                                         */
169 /* Parameters:  Nil                                                         */
170 /*                                                                          */
171 /* Free all memory allocated whilst running and from initialisation.        */
172 /* ------------------------------------------------------------------------ */
173 void fr_fragunload(ifs)
174 ipf_stack_t *ifs;
175 {
176 	if (ifs->ifs_fr_frag_init == 1) {
177 		fr_fragclear(ifs);
178 
179 		RW_DESTROY(&ifs->ifs_ipf_frag);
180 		ifs->ifs_fr_frag_init = 0;
181 	}
182 
183 	if (ifs->ifs_ipfr_heads != NULL) {
184 		KFREES(ifs->ifs_ipfr_heads,
185 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
186 	}
187 	ifs->ifs_ipfr_heads = NULL;
188 
189 	if (ifs->ifs_ipfr_nattab != NULL) {
190 		KFREES(ifs->ifs_ipfr_nattab,
191 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
192 	}
193 	ifs->ifs_ipfr_nattab = NULL;
194 
195 	if (ifs->ifs_ipfr_ipidtab != NULL) {
196 		KFREES(ifs->ifs_ipfr_ipidtab,
197 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
198 	}
199 	ifs->ifs_ipfr_ipidtab = NULL;
200 }
201 
202 
203 /* ------------------------------------------------------------------------ */
204 /* Function:    fr_fragstats                                                */
205 /* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
206 /* Parameters:  Nil                                                         */
207 /*                                                                          */
208 /* Updates ipfr_stats with current information and returns a pointer to it  */
209 /* ------------------------------------------------------------------------ */
210 ipfrstat_t *fr_fragstats(ifs)
211 ipf_stack_t *ifs;
212 {
213 	ifs->ifs_ipfr_stats.ifs_table = ifs->ifs_ipfr_heads;
214 	ifs->ifs_ipfr_stats.ifs_nattab = ifs->ifs_ipfr_nattab;
215 	ifs->ifs_ipfr_stats.ifs_inuse = ifs->ifs_ipfr_inuse;
216 	return &ifs->ifs_ipfr_stats;
217 }
218 
219 
220 /* ------------------------------------------------------------------------ */
221 /* Function:    ipfr_newfrag                                                */
222 /* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
223 /* Parameters:  fin(I)   - pointer to packet information                    */
224 /*              table(I) - pointer to frag table to add to                  */
225 /*                                                                          */
226 /* Add a new entry to the fragment cache, registering it as having come     */
227 /* through this box, with the result of the filter operation.               */
228 /* ------------------------------------------------------------------------ */
229 static ipfr_t *ipfr_newfrag(fin, pass, table)
230 fr_info_t *fin;
231 u_32_t pass;
232 ipfr_t *table[];
233 {
234 	ipfr_t *fra, frag;
235 	u_int idx, off;
236 	ipf_stack_t *ifs = fin->fin_ifs;
237 
238 	if (ifs->ifs_ipfr_inuse >= IPFT_SIZE)
239 		return NULL;
240 
241 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
242 		return NULL;
243 
244 	if (pass & FR_FRSTRICT)
245 		if (fin->fin_off != 0)
246 			return NULL;
247 
248 	frag.ipfr_p = fin->fin_p;
249 	idx = fin->fin_p;
250 	frag.ipfr_id = fin->fin_id;
251 	idx += fin->fin_id;
252 	frag.ipfr_source = fin->fin_fi.fi_src;
253 	idx += frag.ipfr_src.s_addr;
254 	frag.ipfr_dest = fin->fin_fi.fi_dst;
255 	idx += frag.ipfr_dst.s_addr;
256 	frag.ipfr_ifp = fin->fin_ifp;
257 	idx *= 127;
258 	idx %= IPFT_SIZE;
259 
260 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
261 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
262 	frag.ipfr_auth = fin->fin_fi.fi_auth;
263 
264 	/*
265 	 * first, make sure it isn't already there...
266 	 */
267 	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
268 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
269 			  IPFR_CMPSZ)) {
270 			ifs->ifs_ipfr_stats.ifs_exists++;
271 			return NULL;
272 		}
273 
274 	/*
275 	 * allocate some memory, if possible, if not, just record that we
276 	 * failed to do so.
277 	 */
278 	KMALLOC(fra, ipfr_t *);
279 	if (fra == NULL) {
280 		ifs->ifs_ipfr_stats.ifs_nomem++;
281 		return NULL;
282 	}
283 
284 	fra->ipfr_rule = fin->fin_fr;
285 	if (fra->ipfr_rule != NULL) {
286 
287 		frentry_t *fr;
288 
289 		fr = fin->fin_fr;
290 		MUTEX_ENTER(&fr->fr_lock);
291 		fr->fr_ref++;
292 		MUTEX_EXIT(&fr->fr_lock);
293 	}
294 
295 	/*
296 	 * Insert the fragment into the fragment table, copy the struct used
297 	 * in the search using bcopy rather than reassign each field.
298 	 * Set the ttl to the default.
299 	 */
300 	if ((fra->ipfr_hnext = table[idx]) != NULL)
301 		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
302 	fra->ipfr_hprev = table + idx;
303 	fra->ipfr_data = NULL;
304 	table[idx] = fra;
305 	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
306 	fra->ipfr_ttl = ifs->ifs_fr_ticks + ifs->ifs_fr_ipfrttl;
307 
308 	/*
309 	 * Compute the offset of the expected start of the next packet.
310 	 */
311 	off = fin->fin_off;
312 	if (off == 0) {
313 		fra->ipfr_seen0 = 1;
314 		fra->ipfr_firstend = fin->fin_flen;
315 	} else {
316 		fra->ipfr_seen0 = 0;
317 		fra->ipfr_firstend = 0;
318 	}
319 	fra->ipfr_off = off + fin->fin_dlen;
320 	fra->ipfr_pass = pass;
321 	fra->ipfr_ref = 1;
322 	ifs->ifs_ipfr_stats.ifs_new++;
323 	ifs->ifs_ipfr_inuse++;
324 	return fra;
325 }
326 
327 
328 /* ------------------------------------------------------------------------ */
329 /* Function:    fr_newfrag                                                  */
330 /* Returns:     int - 0 == success, -1 == error                             */
331 /* Parameters:  fin(I)  - pointer to packet information                     */
332 /*                                                                          */
333 /* Add a new entry to the fragment cache table based on the current packet  */
334 /* ------------------------------------------------------------------------ */
335 int fr_newfrag(fin, pass)
336 u_32_t pass;
337 fr_info_t *fin;
338 {
339 	ipfr_t	*fra;
340 	ipf_stack_t *ifs = fin->fin_ifs;
341 
342 	if (ifs->ifs_fr_frag_lock != 0)
343 		return -1;
344 
345 	WRITE_ENTER(&ifs->ifs_ipf_frag);
346 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_heads);
347 	if (fra != NULL) {
348 		*ifs->ifs_ipfr_tail = fra;
349 		fra->ipfr_prev = ifs->ifs_ipfr_tail;
350 		ifs->ifs_ipfr_tail = &fra->ipfr_next;
351 		if (ifs->ifs_ipfr_list == NULL)
352 			ifs->ifs_ipfr_list = fra;
353 		fra->ipfr_next = NULL;
354 	}
355 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
356 	return fra ? 0 : -1;
357 }
358 
359 
360 /* ------------------------------------------------------------------------ */
361 /* Function:    fr_nat_newfrag                                              */
362 /* Returns:     int - 0 == success, -1 == error                             */
363 /* Parameters:  fin(I)  - pointer to packet information                     */
364 /*              nat(I)  - pointer to NAT structure                          */
365 /*                                                                          */
366 /* Create a new NAT fragment cache entry based on the current packet and    */
367 /* the NAT structure for this "session".                                    */
368 /* ------------------------------------------------------------------------ */
369 int fr_nat_newfrag(fin, pass, nat)
370 fr_info_t *fin;
371 u_32_t pass;
372 nat_t *nat;
373 {
374 	ipfr_t	*fra;
375 	ipf_stack_t *ifs = fin->fin_ifs;
376 
377 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock != 0))
378 		return 0;
379 
380 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
381 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_nattab);
382 	if (fra != NULL) {
383 		fra->ipfr_data = nat;
384 		nat->nat_data = fra;
385 		*ifs->ifs_ipfr_nattail = fra;
386 		fra->ipfr_prev = ifs->ifs_ipfr_nattail;
387 		ifs->ifs_ipfr_nattail = &fra->ipfr_next;
388 		fra->ipfr_next = NULL;
389 	}
390 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
391 	return fra ? 0 : -1;
392 }
393 
394 
395 /* ------------------------------------------------------------------------ */
396 /* Function:    fr_ipid_newfrag                                             */
397 /* Returns:     int - 0 == success, -1 == error                             */
398 /* Parameters:  fin(I)  - pointer to packet information                     */
399 /*              ipid(I) - new IP ID for this fragmented packet              */
400 /*                                                                          */
401 /* Create a new fragment cache entry for this packet and store, as a data   */
402 /* pointer, the new IP ID value.                                            */
403 /* ------------------------------------------------------------------------ */
404 int fr_ipid_newfrag(fin, ipid)
405 fr_info_t *fin;
406 u_32_t ipid;
407 {
408 	ipfr_t	*fra;
409 	ipf_stack_t *ifs = fin->fin_ifs;
410 
411 	if (ifs->ifs_fr_frag_lock)
412 		return 0;
413 
414 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
415 	fra = ipfr_newfrag(fin, 0, ifs->ifs_ipfr_ipidtab);
416 	if (fra != NULL) {
417 		fra->ipfr_data = (void *)(uintptr_t)ipid;
418 		*ifs->ifs_ipfr_ipidtail = fra;
419 		fra->ipfr_prev = ifs->ifs_ipfr_ipidtail;
420 		ifs->ifs_ipfr_ipidtail = &fra->ipfr_next;
421 		fra->ipfr_next = NULL;
422 	}
423 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
424 	return fra ? 0 : -1;
425 }
426 
427 
428 /* ------------------------------------------------------------------------ */
429 /* Function:    fr_fraglookup                                               */
430 /* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
431 /*                         matching entry in the frag table, else NULL      */
432 /* Parameters:  fin(I)   - pointer to packet information                    */
433 /*              table(I) - pointer to fragment cache table to search        */
434 /*                                                                          */
435 /* Check the fragment cache to see if there is already a record of this     */
436 /* packet with its filter result known.                                     */
437 /* ------------------------------------------------------------------------ */
438 static ipfr_t *fr_fraglookup(fin, table)
439 fr_info_t *fin;
440 ipfr_t *table[];
441 {
442 	ipfr_t *f, frag;
443 	u_int idx;
444 	ipf_stack_t *ifs = fin->fin_ifs;
445 
446 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
447 		return NULL;
448 
449 	/*
450 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
451 	 * (these should all be the same for all fragments of a packet).
452 	 *
453 	 * build up a hash value to index the table with.
454 	 */
455 	frag.ipfr_p = fin->fin_p;
456 	idx = fin->fin_p;
457 	frag.ipfr_id = fin->fin_id;
458 	idx += fin->fin_id;
459 	frag.ipfr_source = fin->fin_fi.fi_src;
460 	idx += frag.ipfr_src.s_addr;
461 	frag.ipfr_dest = fin->fin_fi.fi_dst;
462 	idx += frag.ipfr_dst.s_addr;
463 	frag.ipfr_ifp = fin->fin_ifp;
464 	idx *= 127;
465 	idx %= IPFT_SIZE;
466 
467 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
468 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
469 	frag.ipfr_auth = fin->fin_fi.fi_auth;
470 
471 	/*
472 	 * check the table, careful to only compare the right amount of data
473 	 */
474 	for (f = table[idx]; f; f = f->ipfr_hnext)
475 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
476 			  IPFR_CMPSZ)) {
477 			u_short	off;
478 
479 			/*
480 			 * We don't want to let short packets match because
481 			 * they could be compromising the security of other
482 			 * rules that want to match on layer 4 fields (and
483 			 * can't because they have been fragmented off.)
484 			 * Why do this check here?  The counter acts as an
485 			 * indicator of this kind of attack, whereas if it was
486 			 * elsewhere, it wouldn't know if other matching
487 			 * packets had been seen.
488 			 */
489 			if (fin->fin_flx & FI_SHORT) {
490 				ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_short);
491 				continue;
492 			}
493 
494 			/*
495 			 * XXX - We really need to be guarding against the
496 			 * retransmission of (src,dst,id,offset-range) here
497 			 * because a fragmented packet is never resent with
498 			 * the same IP ID# (or shouldn't).
499 			 */
500 			off = fin->fin_off; /* same as in ipfr_newfrag() */
501 			if (f->ipfr_seen0) {
502 				if (off == 0) {
503 					ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_retrans0);
504 					continue;
505 				}
506 			} else if (off == 0) {
507 				f->ipfr_seen0 = 1;
508 				f->ipfr_firstend = fin->fin_flen;
509 			}
510 
511 			if (f != table[idx]) {
512 				ipfr_t **fp;
513 
514 				/*
515 				 * Move fragment info. to the top of the list
516 				 * to speed up searches.  First, delink...
517 				 */
518 				fp = f->ipfr_hprev;
519 				(*fp) = f->ipfr_hnext;
520 				if (f->ipfr_hnext != NULL)
521 					f->ipfr_hnext->ipfr_hprev = fp;
522 				/*
523 				 * Then put back at the top of the chain.
524 				 */
525 				f->ipfr_hnext = table[idx];
526 				table[idx]->ipfr_hprev = &f->ipfr_hnext;
527 				f->ipfr_hprev = table + idx;
528 				table[idx] = f;
529 			}
530 
531 			if (fin->fin_v == 6) {
532 				if (f->ipfr_seen0 && (off < f->ipfr_firstend))
533 					fin->fin_flx |= FI_BAD;
534 			}
535 			/*
536 			 * If we've follwed the fragments, and this is the
537 			 * last (in order), shrink expiration time.
538 			 */
539 			if (off == f->ipfr_off) {
540 				if (!(fin->fin_ip->ip_off & IP_MF))
541 					f->ipfr_ttl = ifs->ifs_fr_ticks + 1;
542 				f->ipfr_off = fin->fin_dlen + off;
543 			} else if (f->ipfr_pass & FR_FRSTRICT)
544 				continue;
545 			ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_hits);
546 			return f;
547 		}
548 	return NULL;
549 }
550 
551 
552 /* ------------------------------------------------------------------------ */
553 /* Function:    fr_nat_knownfrag                                            */
554 /* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
555 /*                       match found, else NULL                             */
556 /* Parameters:  fin(I)  - pointer to packet information                     */
557 /*                                                                          */
558 /* Functional interface for NAT lookups of the NAT fragment cache           */
559 /* ------------------------------------------------------------------------ */
560 nat_t *fr_nat_knownfrag(fin)
561 fr_info_t *fin;
562 {
563 	nat_t	*nat;
564 	ipfr_t	*ipf;
565 	ipf_stack_t *ifs = fin->fin_ifs;
566 
567 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_natlist)
568 		return NULL;
569 	READ_ENTER(&ifs->ifs_ipf_natfrag);
570 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_nattab);
571 	if (ipf != NULL) {
572 		nat = ipf->ipfr_data;
573 		/*
574 		 * This is the last fragment for this packet.
575 		 */
576 		if ((ipf->ipfr_ttl == ifs->ifs_fr_ticks + 1) && (nat != NULL)) {
577 			nat->nat_data = NULL;
578 			ipf->ipfr_data = NULL;
579 		}
580 	} else
581 		nat = NULL;
582 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
583 	return nat;
584 }
585 
586 
587 /* ------------------------------------------------------------------------ */
588 /* Function:    fr_ipid_knownfrag                                           */
589 /* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
590 /*                       return 0xfffffff to indicate no match.             */
591 /* Parameters:  fin(I) - pointer to packet information                      */
592 /*                                                                          */
593 /* Functional interface for IP ID lookups of the IP ID fragment cache       */
594 /* ------------------------------------------------------------------------ */
595 u_32_t fr_ipid_knownfrag(fin)
596 fr_info_t *fin;
597 {
598 	ipfr_t	*ipf;
599 	u_32_t	id;
600 	ipf_stack_t *ifs = fin->fin_ifs;
601 
602 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_ipidlist)
603 		return 0xffffffff;
604 
605 	READ_ENTER(&ifs->ifs_ipf_ipidfrag);
606 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_ipidtab);
607 	if (ipf != NULL)
608 		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
609 	else
610 		id = 0xffffffff;
611 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
612 	return id;
613 }
614 
615 
616 /* ------------------------------------------------------------------------ */
617 /* Function:    fr_knownfrag                                                */
618 /* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
619 /*                           the frag cache table, else NULL.               */
620 /* Parameters:  fin(I)   - pointer to packet information                    */
621 /*              passp(O) - pointer to where to store rule flags resturned   */
622 /*                                                                          */
623 /* Functional interface for normal lookups of the fragment cache.  If a     */
624 /* match is found, return the rule pointer and flags from the rule, except  */
625 /* that if FR_LOGFIRST is set, reset FR_LOG.                                */
626 /* ------------------------------------------------------------------------ */
627 frentry_t *fr_knownfrag(fin, passp)
628 fr_info_t *fin;
629 u_32_t *passp;
630 {
631 	frentry_t *fr = NULL;
632 	ipfr_t	*fra;
633 	u_32_t pass, oflx;
634 	ipf_stack_t *ifs = fin->fin_ifs;
635 
636 	if ((ifs->ifs_fr_frag_lock) || (ifs->ifs_ipfr_list == NULL))
637 		return NULL;
638 
639 	READ_ENTER(&ifs->ifs_ipf_frag);
640 	oflx = fin->fin_flx;
641 	fra = fr_fraglookup(fin, ifs->ifs_ipfr_heads);
642 	if (fra != NULL) {
643 		fr = fra->ipfr_rule;
644 		fin->fin_fr = fr;
645 		if (fr != NULL) {
646 			pass = fr->fr_flags;
647 			if ((pass & FR_LOGFIRST) != 0)
648 				pass &= ~(FR_LOGFIRST|FR_LOG);
649 			*passp = pass;
650 		}
651 	}
652 	if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
653 		*passp &= ~FR_CMDMASK;
654 		*passp |= FR_BLOCK;
655 		fr = &ifs->ifs_frblock;
656 	}
657 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
658 	return fr;
659 }
660 
661 
662 /* ------------------------------------------------------------------------ */
663 /* Function:    fr_forget                                                   */
664 /* Returns:     Nil                                                         */
665 /* Parameters:  ptr(I) - pointer to data structure                          */
666 /*                                                                          */
667 /* Search through all of the fragment cache entries and wherever a pointer  */
668 /* is found to match ptr, reset it to NULL.                                 */
669 /* ------------------------------------------------------------------------ */
670 void fr_forget(ptr, ifs)
671 void *ptr;
672 ipf_stack_t *ifs;
673 {
674 	ipfr_t	*fr;
675 
676 	WRITE_ENTER(&ifs->ifs_ipf_frag);
677 	for (fr = ifs->ifs_ipfr_list; fr; fr = fr->ipfr_next)
678 		if (fr->ipfr_data == ptr)
679 			fr->ipfr_data = NULL;
680 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
681 }
682 
683 
684 /* ------------------------------------------------------------------------ */
685 /* Function:    fr_forgetnat                                                */
686 /* Returns:     Nil                                                         */
687 /* Parameters:  ptr(I) - pointer to data structure                          */
688 /*                                                                          */
689 /* Search through all of the fragment cache entries for NAT and wherever a  */
690 /* pointer  is found to match ptr, reset it to NULL.                        */
691 /* ------------------------------------------------------------------------ */
692 void fr_forgetnat(ptr, ifs)
693 void *ptr;
694 ipf_stack_t *ifs;
695 {
696 	ipfr_t	*fr;
697 
698 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
699 	for (fr = ifs->ifs_ipfr_natlist; fr; fr = fr->ipfr_next)
700 		if (fr->ipfr_data == ptr)
701 			fr->ipfr_data = NULL;
702 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
703 }
704 
705 
706 /* ------------------------------------------------------------------------ */
707 /* Function:    fr_fragdelete                                               */
708 /* Returns:     Nil                                                         */
709 /* Parameters:  fra(I)   - pointer to fragment structure to delete          */
710 /*              tail(IO) - pointer to the pointer to the tail of the frag   */
711 /*                         list                                             */
712 /*                                                                          */
713 /* Remove a fragment cache table entry from the table & list.  Also free    */
714 /* the filter rule it is associated with it if it is no longer used as a    */
715 /* result of decreasing the reference count.                                */
716 /* ------------------------------------------------------------------------ */
717 static void fr_fragdelete(fra, tail, ifs)
718 ipfr_t *fra, ***tail;
719 ipf_stack_t *ifs;
720 {
721 	frentry_t *fr;
722 
723 	fr = fra->ipfr_rule;
724 	if (fr != NULL)
725 	    (void)fr_derefrule(&fr, ifs);
726 
727 	if (fra->ipfr_next)
728 		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
729 	*fra->ipfr_prev = fra->ipfr_next;
730 	if (*tail == &fra->ipfr_next)
731 		*tail = fra->ipfr_prev;
732 
733 	if (fra->ipfr_hnext)
734 		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
735 	*fra->ipfr_hprev = fra->ipfr_hnext;
736 
737 	if (fra->ipfr_ref <= 0)
738 		KFREE(fra);
739 }
740 
741 
742 /* ------------------------------------------------------------------------ */
743 /* Function:    fr_fragclear                                                */
744 /* Returns:     Nil                                                         */
745 /* Parameters:  Nil                                                         */
746 /*                                                                          */
747 /* Free memory in use by fragment state information kept.  Do the normal    */
748 /* fragment state stuff first and then the NAT-fragment table.              */
749 /* ------------------------------------------------------------------------ */
750 void fr_fragclear(ifs)
751 ipf_stack_t *ifs;
752 {
753 	ipfr_t	*fra;
754 	nat_t	*nat;
755 
756 	WRITE_ENTER(&ifs->ifs_ipf_frag);
757 	while ((fra = ifs->ifs_ipfr_list) != NULL) {
758 		fra->ipfr_ref--;
759 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
760 	}
761 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
762 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
763 
764 	WRITE_ENTER(&ifs->ifs_ipf_nat);
765 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
766 	while ((fra = ifs->ifs_ipfr_natlist) != NULL) {
767 		nat = fra->ipfr_data;
768 		if (nat != NULL) {
769 			if (nat->nat_data == fra)
770 				nat->nat_data = NULL;
771 		}
772 		fra->ipfr_ref--;
773 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
774 	}
775 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
776 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
777 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
778 }
779 
780 
781 /* ------------------------------------------------------------------------ */
782 /* Function:    fr_fragexpire                                               */
783 /* Returns:     Nil                                                         */
784 /* Parameters:  Nil                                                         */
785 /*                                                                          */
786 /* Expire entries in the fragment cache table that have been there too long */
787 /* ------------------------------------------------------------------------ */
788 void fr_fragexpire(ifs)
789 ipf_stack_t *ifs;
790 {
791 	ipfr_t	**fp, *fra;
792 	nat_t	*nat;
793 	SPL_INT(s);
794 
795 	if (ifs->ifs_fr_frag_lock)
796 		return;
797 
798 	SPL_NET(s);
799 	WRITE_ENTER(&ifs->ifs_ipf_frag);
800 	/*
801 	 * Go through the entire table, looking for entries to expire,
802 	 * which is indicated by the ttl being less than or equal to
803 	 * ifs_fr_ticks.
804 	 */
805 	for (fp = &ifs->ifs_ipfr_list; ((fra = *fp) != NULL); ) {
806 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
807 			break;
808 		fra->ipfr_ref--;
809 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
810 		ifs->ifs_ipfr_stats.ifs_expire++;
811 		ifs->ifs_ipfr_inuse--;
812 	}
813 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
814 
815 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
816 	for (fp = &ifs->ifs_ipfr_ipidlist; ((fra = *fp) != NULL); ) {
817 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
818 			break;
819 		fra->ipfr_ref--;
820 		fr_fragdelete(fra, &ifs->ifs_ipfr_ipidtail, ifs);
821 		ifs->ifs_ipfr_stats.ifs_expire++;
822 		ifs->ifs_ipfr_inuse--;
823 	}
824 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
825 
826 	/*
827 	 * Same again for the NAT table, except that if the structure also
828 	 * still points to a NAT structure, and the NAT structure points back
829 	 * at the one to be free'd, NULL the reference from the NAT struct.
830 	 * NOTE: We need to grab both mutex's early, and in this order so as
831 	 * to prevent a deadlock if both try to expire at the same time.
832 	 */
833 	WRITE_ENTER(&ifs->ifs_ipf_nat);
834 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
835 	for (fp = &ifs->ifs_ipfr_natlist; ((fra = *fp) != NULL); ) {
836 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
837 			break;
838 		nat = fra->ipfr_data;
839 		if (nat != NULL) {
840 			if (nat->nat_data == fra)
841 				nat->nat_data = NULL;
842 		}
843 		fra->ipfr_ref--;
844 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
845 		ifs->ifs_ipfr_stats.ifs_expire++;
846 		ifs->ifs_ipfr_inuse--;
847 	}
848 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
849 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
850 	SPL_X(s);
851 }
852 
853 
854 /* ------------------------------------------------------------------------ */
855 /* Function:    fr_slowtimer                                                */
856 /* Returns:     Nil                                                         */
857 /* Parameters:  Nil                                                         */
858 /*                                                                          */
859 /* Slowly expire held state for fragments.  Timeouts are set * in           */
860 /* expectation of this being called twice per second.                       */
861 /* ------------------------------------------------------------------------ */
862 #if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
863 			  !defined(__osf__) && !defined(linux))
864 # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
865 void fr_slowtimer __P((void *arg))
866 # else
867 int fr_slowtimer(void *arg)
868 # endif
869 {
870 	ipf_stack_t *ifs = arg;
871 
872 	READ_ENTER(&ifs->ifs_ipf_global);
873 
874 	fr_fragexpire(ifs);
875 	fr_timeoutstate(ifs);
876 	fr_natexpire(ifs);
877 	fr_authexpire(ifs);
878 	ifs->ifs_fr_ticks++;
879 	if (ifs->ifs_fr_running <= 0)
880 		goto done;
881 # ifdef _KERNEL
882 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
883 	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
884 #  else
885 #   if defined(__OpenBSD__)
886 	timeout_add(&fr_slowtimer_ch, hz/2);
887 #   else
888 #    if (__FreeBSD_version >= 300000)
889 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
890 #    else
891 #     ifdef linux
892 	;
893 #     else
894 	timeout(fr_slowtimer, NULL, hz/2);
895 #     endif
896 #    endif /* FreeBSD */
897 #   endif /* OpenBSD */
898 #  endif /* NetBSD */
899 # endif
900 done:
901 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
902 # if (BSD < 199103) || !defined(_KERNEL)
903 	return 0;
904 # endif
905 }
906 #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
907 
908 /*ARGSUSED*/
909 int fr_nextfrag(token, itp, top, tail, lock, ifs)
910 ipftoken_t *token;
911 ipfgeniter_t *itp;
912 ipfr_t **top, ***tail;
913 ipfrwlock_t *lock;
914 ipf_stack_t *ifs;
915 {
916 	ipfr_t *frag, *next, zero;
917 	int error = 0;
918 
919 	frag = token->ipt_data;
920 	if (frag == (ipfr_t *)-1) {
921 		ipf_freetoken(token, ifs);
922 		return ESRCH;
923 	}
924 
925 	READ_ENTER(lock);
926 	if (frag == NULL)
927 		next = *top;
928 	else
929 		next = frag->ipfr_next;
930 
931 	if (next != NULL) {
932 		ATOMIC_INC(next->ipfr_ref);
933 		token->ipt_data = next;
934 	} else {
935 		bzero(&zero, sizeof(zero));
936 		next = &zero;
937 		token->ipt_data = (void *)-1;
938 	}
939 	RWLOCK_EXIT(lock);
940 
941 	if (frag != NULL) {
942 		fr_fragderef(&frag, lock, ifs);
943 	}
944 
945 	error = COPYOUT(next, itp->igi_data, sizeof(*next));
946 	if (error != 0)
947 		error = EFAULT;
948 
949 	return error;
950 }
951 
952 
953 void fr_fragderef(frp, lock, ifs)
954 ipfr_t **frp;
955 ipfrwlock_t *lock;
956 ipf_stack_t *ifs;
957 {
958 	ipfr_t *fra;
959 
960 	fra = *frp;
961 	*frp = NULL;
962 
963 	WRITE_ENTER(lock);
964 	fra->ipfr_ref--;
965 	if (fra->ipfr_ref <= 0) {
966 		KFREE(fra);
967 		ifs->ifs_ipfr_stats.ifs_expire++;
968 		ifs->ifs_ipfr_inuse--;
969 	}
970 	RWLOCK_EXIT(lock);
971 }
972