xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 0e01ff8b)
1 /*
2  * Copyright (C) 1995-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	void	nat_delrdr __P((struct ipnat *));
149 static	void	nat_delnat __P((struct ipnat *));
150 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
151 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
153 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
154 static	int	nat_match __P((fr_info_t *, ipnat_t *));
155 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
156 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
158 				    struct in_addr, struct in_addr, u_32_t,
159 				    ipf_stack_t *));
160 static	void	nat_hostmapdel __P((struct hostmap *));
161 static	INLINE	int nat_icmpquerytype4 __P((int));
162 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
163 				    ipf_stack_t *));
164 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
165 				    ipf_stack_t *));
166 static	INLINE	int nat_icmperrortype4 __P((int));
167 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
168 				      tcphdr_t *, nat_t **, int));
169 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
170 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
171 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
172 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
173 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
174 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
175 static	int	nat_extraflush __P((int, ipf_stack_t *));
176 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
177 static	int	nat_flushclosing __P((int, ipf_stack_t *));
178 
179 
180 /*
181  * Below we declare a list of constants used only in the nat_extraflush()
182  * routine.  We are placing it here, instead of in nat_extraflush() itself,
183  * because we want to make it visible to tools such as mdb, nm etc., so the
184  * values can easily be altered during debugging.
185  */
186 static	const int	idletime_tab[] = {
187 	IPF_TTLVAL(30),		/* 30 seconds */
188 	IPF_TTLVAL(1800),	/* 30 minutes */
189 	IPF_TTLVAL(43200),	/* 12 hours */
190 	IPF_TTLVAL(345600),	/* 4 days */
191 };
192 
193 
194 /* ------------------------------------------------------------------------ */
195 /* Function:    fr_natinit                                                  */
196 /* Returns:     int - 0 == success, -1 == failure                           */
197 /* Parameters:  Nil                                                         */
198 /*                                                                          */
199 /* Initialise all of the NAT locks, tables and other structures.            */
200 /* ------------------------------------------------------------------------ */
201 int fr_natinit(ifs)
202 ipf_stack_t *ifs;
203 {
204 	int i;
205 
206 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
207 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
208 	if (ifs->ifs_nat_table[0] != NULL)
209 		bzero((char *)ifs->ifs_nat_table[0],
210 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
211 	else
212 		return -1;
213 
214 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
215 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
216 	if (ifs->ifs_nat_table[1] != NULL)
217 		bzero((char *)ifs->ifs_nat_table[1],
218 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
219 	else
220 		return -2;
221 
222 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
223 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
224 	if (ifs->ifs_nat_rules != NULL)
225 		bzero((char *)ifs->ifs_nat_rules,
226 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
227 	else
228 		return -3;
229 
230 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
231 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
232 	if (ifs->ifs_rdr_rules != NULL)
233 		bzero((char *)ifs->ifs_rdr_rules,
234 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
235 	else
236 		return -4;
237 
238 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
239 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
240 	if (ifs->ifs_maptable != NULL)
241 		bzero((char *)ifs->ifs_maptable,
242 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
243 	else
244 		return -5;
245 
246 	ifs->ifs_ipf_hm_maplist = NULL;
247 
248 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
249 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
250 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
251 		return -1;
252 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
253 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
254 
255 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
256 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
257 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
258 		return -1;
259 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
260 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
261 
262 	if (ifs->ifs_fr_nat_maxbucket == 0) {
263 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
264 			ifs->ifs_fr_nat_maxbucket++;
265 		ifs->ifs_fr_nat_maxbucket *= 2;
266 	}
267 
268 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
269 	/*
270 	 * Increase this because we may have "keep state" following this too
271 	 * and packet storms can occur if this is removed too quickly.
272 	 */
273 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
274 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
275 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
276 	ifs->ifs_nat_udptq.ifq_ref = 1;
277 	ifs->ifs_nat_udptq.ifq_head = NULL;
278 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
279 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
280 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
281 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
282 	ifs->ifs_nat_icmptq.ifq_ref = 1;
283 	ifs->ifs_nat_icmptq.ifq_head = NULL;
284 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
285 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
286 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
287 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
288 	ifs->ifs_nat_iptq.ifq_ref = 1;
289 	ifs->ifs_nat_iptq.ifq_head = NULL;
290 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
291 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
292 	ifs->ifs_nat_iptq.ifq_next = NULL;
293 
294 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
295 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
296 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
297 #ifdef LARGE_NAT
298 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
299 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
300 #endif
301 	}
302 
303 	/*
304 	 * Increase this because we may have "keep state" following
305 	 * this too and packet storms can occur if this is removed
306 	 * too quickly.
307 	 */
308 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
309 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
310 
311 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
312 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
313 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
314 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
315 
316 	ifs->ifs_fr_nat_init = 1;
317 
318 	return 0;
319 }
320 
321 
322 /* ------------------------------------------------------------------------ */
323 /* Function:    nat_addrdr                                                  */
324 /* Returns:     Nil                                                         */
325 /* Parameters:  n(I) - pointer to NAT rule to add                           */
326 /*                                                                          */
327 /* Adds a redirect rule to the hash table of redirect rules and the list of */
328 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
329 /* use by redirect rules.                                                   */
330 /* ------------------------------------------------------------------------ */
331 static void nat_addrdr(n, ifs)
332 ipnat_t *n;
333 ipf_stack_t *ifs;
334 {
335 	ipnat_t **np;
336 	u_32_t j;
337 	u_int hv;
338 	int k;
339 
340 	k = count4bits(n->in_outmsk);
341 	if ((k >= 0) && (k != 32))
342 		ifs->ifs_rdr_masks |= 1 << k;
343 	j = (n->in_outip & n->in_outmsk);
344 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
345 	np = ifs->ifs_rdr_rules + hv;
346 	while (*np != NULL)
347 		np = &(*np)->in_rnext;
348 	n->in_rnext = NULL;
349 	n->in_prnext = np;
350 	n->in_hv = hv;
351 	*np = n;
352 }
353 
354 
355 /* ------------------------------------------------------------------------ */
356 /* Function:    nat_addnat                                                  */
357 /* Returns:     Nil                                                         */
358 /* Parameters:  n(I) - pointer to NAT rule to add                           */
359 /*                                                                          */
360 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
361 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
362 /* redirect rules.                                                          */
363 /* ------------------------------------------------------------------------ */
364 static void nat_addnat(n, ifs)
365 ipnat_t *n;
366 ipf_stack_t *ifs;
367 {
368 	ipnat_t **np;
369 	u_32_t j;
370 	u_int hv;
371 	int k;
372 
373 	k = count4bits(n->in_inmsk);
374 	if ((k >= 0) && (k != 32))
375 		ifs->ifs_nat_masks |= 1 << k;
376 	j = (n->in_inip & n->in_inmsk);
377 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
378 	np = ifs->ifs_nat_rules + hv;
379 	while (*np != NULL)
380 		np = &(*np)->in_mnext;
381 	n->in_mnext = NULL;
382 	n->in_pmnext = np;
383 	n->in_hv = hv;
384 	*np = n;
385 }
386 
387 
388 /* ------------------------------------------------------------------------ */
389 /* Function:    nat_delrdr                                                  */
390 /* Returns:     Nil                                                         */
391 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
392 /*                                                                          */
393 /* Removes a redirect rule from the hash table of redirect rules.           */
394 /* ------------------------------------------------------------------------ */
395 static void nat_delrdr(n)
396 ipnat_t *n;
397 {
398 	if (n->in_rnext)
399 		n->in_rnext->in_prnext = n->in_prnext;
400 	*n->in_prnext = n->in_rnext;
401 }
402 
403 
404 /* ------------------------------------------------------------------------ */
405 /* Function:    nat_delnat                                                  */
406 /* Returns:     Nil                                                         */
407 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
408 /*                                                                          */
409 /* Removes a NAT map rule from the hash table of NAT map rules.             */
410 /* ------------------------------------------------------------------------ */
411 static void nat_delnat(n)
412 ipnat_t *n;
413 {
414 	if (n->in_mnext != NULL)
415 		n->in_mnext->in_pmnext = n->in_pmnext;
416 	*n->in_pmnext = n->in_mnext;
417 }
418 
419 
420 /* ------------------------------------------------------------------------ */
421 /* Function:    nat_hostmap                                                 */
422 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
423 /*                                else a pointer to the hostmapping to use  */
424 /* Parameters:  np(I)   - pointer to NAT rule                               */
425 /*              real(I) - real IP address                                   */
426 /*              map(I)  - mapped IP address                                 */
427 /*              port(I) - destination port number                           */
428 /* Write Locks: ipf_nat                                                     */
429 /*                                                                          */
430 /* Check if an ip address has already been allocated for a given mapping    */
431 /* that is not doing port based translation.  If is not yet allocated, then */
432 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
433 /* ------------------------------------------------------------------------ */
434 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
435 ipnat_t *np;
436 struct in_addr src;
437 struct in_addr dst;
438 struct in_addr map;
439 u_32_t port;
440 ipf_stack_t *ifs;
441 {
442 	hostmap_t *hm;
443 	u_int hv;
444 
445 	hv = (src.s_addr ^ dst.s_addr);
446 	hv += src.s_addr;
447 	hv += dst.s_addr;
448 	hv %= HOSTMAP_SIZE;
449 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
450 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
451 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
452 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
453 		    ((port == 0) || (port == hm->hm_port))) {
454 			hm->hm_ref++;
455 			return hm;
456 		}
457 
458 	if (np == NULL)
459 		return NULL;
460 
461 	KMALLOC(hm, hostmap_t *);
462 	if (hm) {
463 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
464 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
465 		if (ifs->ifs_ipf_hm_maplist != NULL)
466 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
467 		ifs->ifs_ipf_hm_maplist = hm;
468 
469 		hm->hm_next = ifs->ifs_maptable[hv];
470 		hm->hm_pnext = ifs->ifs_maptable + hv;
471 		if (ifs->ifs_maptable[hv] != NULL)
472 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
473 		ifs->ifs_maptable[hv] = hm;
474 		hm->hm_ipnat = np;
475 		hm->hm_srcip = src;
476 		hm->hm_dstip = dst;
477 		hm->hm_mapip = map;
478 		hm->hm_ref = 1;
479 		hm->hm_port = port;
480 	}
481 	return hm;
482 }
483 
484 
485 /* ------------------------------------------------------------------------ */
486 /* Function:    nat_hostmapdel                                              */
487 /* Returns:     Nil                                                         */
488 /* Parameters:  hm(I) - pointer to hostmap structure                        */
489 /* Write Locks: ipf_nat                                                     */
490 /*                                                                          */
491 /* Decrement the references to this hostmap structure by one.  If this      */
492 /* reaches zero then remove it and free it.                                 */
493 /* ------------------------------------------------------------------------ */
494 static void nat_hostmapdel(hm)
495 struct hostmap *hm;
496 {
497 	hm->hm_ref--;
498 	if (hm->hm_ref == 0) {
499 		if (hm->hm_next)
500 			hm->hm_next->hm_pnext = hm->hm_pnext;
501 		*hm->hm_pnext = hm->hm_next;
502 		if (hm->hm_hnext)
503 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
504 		*hm->hm_phnext = hm->hm_hnext;
505 		KFREE(hm);
506 	}
507 }
508 
509 void fr_hostmapderef(hmp)
510 struct hostmap **hmp;
511 {
512 	struct hostmap *hm;
513 
514 	hm = *hmp;
515 	*hmp = NULL;
516 	hm->hm_ref--;
517 	if (hm->hm_ref == 0)
518 		nat_hostmapdel(hm);
519 }
520 
521 
522 /* ------------------------------------------------------------------------ */
523 /* Function:    fix_outcksum                                                */
524 /* Returns:     Nil                                                         */
525 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
526 /*              n((I)  - amount to adjust checksum by                       */
527 /*                                                                          */
528 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
529 /* ------------------------------------------------------------------------ */
530 void fix_outcksum(sp, n)
531 u_short *sp;
532 u_32_t n;
533 {
534 	u_short sumshort;
535 	u_32_t sum1;
536 
537 	if (n == 0)
538 		return;
539 
540 	sum1 = (~ntohs(*sp)) & 0xffff;
541 	sum1 += (n);
542 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543 	/* Again */
544 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545 	sumshort = ~(u_short)sum1;
546 	*(sp) = htons(sumshort);
547 }
548 
549 
550 /* ------------------------------------------------------------------------ */
551 /* Function:    fix_incksum                                                 */
552 /* Returns:     Nil                                                         */
553 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
554 /*              n((I)  - amount to adjust checksum by                       */
555 /*                                                                          */
556 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
557 /* ------------------------------------------------------------------------ */
558 void fix_incksum(sp, n)
559 u_short *sp;
560 u_32_t n;
561 {
562 	u_short sumshort;
563 	u_32_t sum1;
564 
565 	if (n == 0)
566 		return;
567 
568 	sum1 = (~ntohs(*sp)) & 0xffff;
569 	sum1 += ~(n) & 0xffff;
570 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
571 	/* Again */
572 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
573 	sumshort = ~(u_short)sum1;
574 	*(sp) = htons(sumshort);
575 }
576 
577 
578 /* ------------------------------------------------------------------------ */
579 /* Function:    fix_datacksum                                               */
580 /* Returns:     Nil                                                         */
581 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
582 /*              n((I)  - amount to adjust checksum by                       */
583 /*                                                                          */
584 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
585 /* data section of an IP packet.                                            */
586 /*                                                                          */
587 /* The only situation in which you need to do this is when NAT'ing an       */
588 /* ICMP error message. Such a message, contains in its body the IP header   */
589 /* of the original IP packet, that causes the error.                        */
590 /*                                                                          */
591 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
592 /* kernel the data section of the ICMP error is just data, and no special   */
593 /* processing like hardware cksum or ntohs processing have been done by the */
594 /* kernel on the data section.                                              */
595 /* ------------------------------------------------------------------------ */
596 void fix_datacksum(sp, n)
597 u_short *sp;
598 u_32_t n;
599 {
600 	u_short sumshort;
601 	u_32_t sum1;
602 
603 	if (n == 0)
604 		return;
605 
606 	sum1 = (~ntohs(*sp)) & 0xffff;
607 	sum1 += (n);
608 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
609 	/* Again */
610 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
611 	sumshort = ~(u_short)sum1;
612 	*(sp) = htons(sumshort);
613 }
614 
615 
616 /* ------------------------------------------------------------------------ */
617 /* Function:    fr_nat_ioctl                                                */
618 /* Returns:     int - 0 == success, != 0 == failure                         */
619 /* Parameters:  data(I) - pointer to ioctl data                             */
620 /*              cmd(I)  - ioctl command integer                             */
621 /*              mode(I) - file mode bits used with open                     */
622 /*                                                                          */
623 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
624 /* ------------------------------------------------------------------------ */
625 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
626 ioctlcmd_t cmd;
627 caddr_t data;
628 int mode, uid;
629 void *ctx;
630 ipf_stack_t *ifs;
631 {
632 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
633 	int error = 0, ret, arg, getlock;
634 	ipnat_t natd;
635 
636 #if (BSD >= 199306) && defined(_KERNEL)
637 	if ((securelevel >= 2) && (mode & FWRITE))
638 		return EPERM;
639 #endif
640 
641 #if defined(__osf__) && defined(_KERNEL)
642 	getlock = 0;
643 #else
644 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
645 #endif
646 
647 	nat = NULL;     /* XXX gcc -Wuninitialized */
648 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
649 		KMALLOC(nt, ipnat_t *);
650 	} else {
651 		nt = NULL;
652 	}
653 
654 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
655 		if (mode & NAT_SYSSPACE) {
656 			bcopy(data, (char *)&natd, sizeof(natd));
657 			error = 0;
658 		} else {
659 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
660 		}
661 
662 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
663 		BCOPYIN(data, &arg, sizeof(arg));
664 	}
665 
666 	if (error != 0)
667 		goto done;
668 
669 	/*
670 	 * For add/delete, look to see if the NAT entry is already present
671 	 */
672 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
673 		nat = &natd;
674 		if (nat->in_v == 0)	/* For backward compat. */
675 			nat->in_v = 4;
676 		nat->in_flags &= IPN_USERFLAGS;
677 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
678 			if ((nat->in_flags & IPN_SPLIT) == 0)
679 				nat->in_inip &= nat->in_inmsk;
680 			if ((nat->in_flags & IPN_IPRANGE) == 0)
681 				nat->in_outip &= nat->in_outmsk;
682 		}
683 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
684 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
685 		     np = &n->in_next)
686 			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
687 					IPN_CMPSIZ))
688 				break;
689 	}
690 
691 	switch (cmd)
692 	{
693 	case SIOCGENITER :
694 	    {
695 		ipfgeniter_t iter;
696 		ipftoken_t *token;
697 
698 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
699 		if (error != 0)
700 			break;
701 
702 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
703 		if (token != NULL)
704 			error  = nat_iterator(token, &iter, ifs);
705 		else
706 			error = ESRCH;
707 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
708 		break;
709 	    }
710 #ifdef  IPFILTER_LOG
711 	case SIOCIPFFB :
712 	{
713 		int tmp;
714 
715 		if (!(mode & FWRITE))
716 			error = EPERM;
717 		else {
718 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
719 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
720 		}
721 		break;
722 	}
723 	case SIOCSETLG :
724 		if (!(mode & FWRITE))
725 			error = EPERM;
726 		else {
727 			BCOPYIN((char *)data,
728 				       (char *)&ifs->ifs_nat_logging,
729 				sizeof(ifs->ifs_nat_logging));
730 		}
731 		break;
732 	case SIOCGETLG :
733 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
734 			sizeof(ifs->ifs_nat_logging));
735 		break;
736 	case FIONREAD :
737 		arg = ifs->ifs_iplused[IPL_LOGNAT];
738 		BCOPYOUT(&arg, data, sizeof(arg));
739 		break;
740 #endif
741 	case SIOCADNAT :
742 		if (!(mode & FWRITE)) {
743 			error = EPERM;
744 		} else if (n != NULL) {
745 			error = EEXIST;
746 		} else if (nt == NULL) {
747 			error = ENOMEM;
748 		}
749 		if (error != 0) {
750 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
751 			break;
752 		}
753 		bcopy((char *)nat, (char *)nt, sizeof(*n));
754 		error = nat_siocaddnat(nt, np, getlock, ifs);
755 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
756 		if (error == 0)
757 			nt = NULL;
758 		break;
759 	case SIOCRMNAT :
760 		if (!(mode & FWRITE)) {
761 			error = EPERM;
762 			n = NULL;
763 		} else if (n == NULL) {
764 			error = ESRCH;
765 		}
766 
767 		if (error != 0) {
768 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
769 			break;
770 		}
771 		nat_siocdelnat(n, np, getlock, ifs);
772 
773 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
774 		n = NULL;
775 		break;
776 	case SIOCGNATS :
777 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
778 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
779 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
780 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
781 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
782 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
783 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
784 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
785 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
786 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
787 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
788 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
789 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
790 		break;
791 	case SIOCGNATL :
792 	    {
793 		natlookup_t nl;
794 
795 		if (getlock) {
796 			READ_ENTER(&ifs->ifs_ipf_nat);
797 		}
798 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
799 		if (error == 0) {
800 			if (nat_lookupredir(&nl, ifs) != NULL) {
801 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
802 			} else {
803 				error = ESRCH;
804 			}
805 		}
806 		if (getlock) {
807 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
808 		}
809 		break;
810 	    }
811 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
812 		if (!(mode & FWRITE)) {
813 			error = EPERM;
814 			break;
815 		}
816 		if (getlock) {
817 			WRITE_ENTER(&ifs->ifs_ipf_nat);
818 		}
819 		error = 0;
820 		if (arg == 0)
821 			ret = nat_flushtable(ifs);
822 		else if (arg == 1)
823 			ret = nat_clearlist(ifs);
824 		else if (arg >= 2 && arg <= 4)
825 			ret = nat_extraflush(arg - 2, ifs);
826 		else
827 			error = EINVAL;
828 		if (getlock) {
829 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
830 		}
831 		if (error == 0) {
832 			BCOPYOUT(&ret, data, sizeof(ret));
833 		}
834 		break;
835 	case SIOCPROXY :
836 		error = appr_ioctl(data, cmd, mode, ifs);
837 		break;
838 	case SIOCSTLCK :
839 		if (!(mode & FWRITE)) {
840 			error = EPERM;
841 		} else {
842 			fr_lock(data, &ifs->ifs_fr_nat_lock);
843 		}
844 		break;
845 	case SIOCSTPUT :
846 		if ((mode & FWRITE) != 0) {
847 			error = fr_natputent(data, getlock, ifs);
848 		} else {
849 			error = EACCES;
850 		}
851 		break;
852 	case SIOCSTGSZ :
853 		if (ifs->ifs_fr_nat_lock) {
854 			if (getlock) {
855 				READ_ENTER(&ifs->ifs_ipf_nat);
856 			}
857 			error = fr_natgetsz(data, ifs);
858 			if (getlock) {
859 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
860 			}
861 		} else
862 			error = EACCES;
863 		break;
864 	case SIOCSTGET :
865 		if (ifs->ifs_fr_nat_lock) {
866 			if (getlock) {
867 				READ_ENTER(&ifs->ifs_ipf_nat);
868 			}
869 			error = fr_natgetent(data, ifs);
870 			if (getlock) {
871 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
872 			}
873 		} else
874 			error = EACCES;
875 		break;
876 	case SIOCIPFDELTOK :
877 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
878 		error = ipf_deltoken(arg, uid, ctx, ifs);
879 		break;
880 	default :
881 		error = EINVAL;
882 		break;
883 	}
884 done:
885 	if (nt)
886 		KFREE(nt);
887 	return error;
888 }
889 
890 
891 /* ------------------------------------------------------------------------ */
892 /* Function:    nat_siocaddnat                                              */
893 /* Returns:     int - 0 == success, != 0 == failure                         */
894 /* Parameters:  n(I)       - pointer to new NAT rule                        */
895 /*              np(I)      - pointer to where to insert new NAT rule        */
896 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
897 /* Mutex Locks: ipf_natio                                                   */
898 /*                                                                          */
899 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
900 /* from information passed to the kernel, then add it  to the appropriate   */
901 /* NAT rule table(s).                                                       */
902 /* ------------------------------------------------------------------------ */
903 static int nat_siocaddnat(n, np, getlock, ifs)
904 ipnat_t *n, **np;
905 int getlock;
906 ipf_stack_t *ifs;
907 {
908 	int error = 0, i, j;
909 
910 	if (nat_resolverule(n, ifs) != 0)
911 		return ENOENT;
912 
913 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
914 		return EINVAL;
915 
916 	n->in_use = 0;
917 	if (n->in_redir & NAT_MAPBLK)
918 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
919 	else if (n->in_flags & IPN_AUTOPORTMAP)
920 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
921 	else if (n->in_flags & IPN_IPRANGE)
922 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
923 	else if (n->in_flags & IPN_SPLIT)
924 		n->in_space = 2;
925 	else if (n->in_outmsk != 0)
926 		n->in_space = ~ntohl(n->in_outmsk);
927 	else
928 		n->in_space = 1;
929 
930 	/*
931 	 * Calculate the number of valid IP addresses in the output
932 	 * mapping range.  In all cases, the range is inclusive of
933 	 * the start and ending IP addresses.
934 	 * If to a CIDR address, lose 2: broadcast + network address
935 	 *                               (so subtract 1)
936 	 * If to a range, add one.
937 	 * If to a single IP address, set to 1.
938 	 */
939 	if (n->in_space) {
940 		if ((n->in_flags & IPN_IPRANGE) != 0)
941 			n->in_space += 1;
942 		else
943 			n->in_space -= 1;
944 	} else
945 		n->in_space = 1;
946 
947 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
948 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
949 		n->in_nip = ntohl(n->in_outip) + 1;
950 	else if ((n->in_flags & IPN_SPLIT) &&
951 		 (n->in_redir & NAT_REDIRECT))
952 		n->in_nip = ntohl(n->in_inip);
953 	else
954 		n->in_nip = ntohl(n->in_outip);
955 	if (n->in_redir & NAT_MAP) {
956 		n->in_pnext = ntohs(n->in_pmin);
957 		/*
958 		 * Multiply by the number of ports made available.
959 		 */
960 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
961 			n->in_space *= (ntohs(n->in_pmax) -
962 					ntohs(n->in_pmin) + 1);
963 			/*
964 			 * Because two different sources can map to
965 			 * different destinations but use the same
966 			 * local IP#/port #.
967 			 * If the result is smaller than in_space, then
968 			 * we may have wrapped around 32bits.
969 			 */
970 			i = n->in_inmsk;
971 			if ((i != 0) && (i != 0xffffffff)) {
972 				j = n->in_space * (~ntohl(i) + 1);
973 				if (j >= n->in_space)
974 					n->in_space = j;
975 				else
976 					n->in_space = 0xffffffff;
977 			}
978 		}
979 		/*
980 		 * If no protocol is specified, multiple by 256 to allow for
981 		 * at least one IP:IP mapping per protocol.
982 		 */
983 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
984 				j = n->in_space * 256;
985 				if (j >= n->in_space)
986 					n->in_space = j;
987 				else
988 					n->in_space = 0xffffffff;
989 		}
990 	}
991 
992 	/* Otherwise, these fields are preset */
993 
994 	if (getlock) {
995 		WRITE_ENTER(&ifs->ifs_ipf_nat);
996 	}
997 	n->in_next = NULL;
998 	*np = n;
999 
1000 	if (n->in_age[0] != 0)
1001 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1002 						  n->in_age[0], ifs);
1003 
1004 	if (n->in_age[1] != 0)
1005 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1006 						  n->in_age[1], ifs);
1007 
1008 	if (n->in_redir & NAT_REDIRECT) {
1009 		n->in_flags &= ~IPN_NOTDST;
1010 		nat_addrdr(n, ifs);
1011 	}
1012 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1013 		n->in_flags &= ~IPN_NOTSRC;
1014 		nat_addnat(n, ifs);
1015 	}
1016 	n = NULL;
1017 	ifs->ifs_nat_stats.ns_rules++;
1018 	if (getlock) {
1019 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1020 	}
1021 
1022 	return error;
1023 }
1024 
1025 
1026 /* ------------------------------------------------------------------------ */
1027 /* Function:    nat_resolvrule                                              */
1028 /* Returns:     int - 0 == success, -1 == failure                           */
1029 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1030 /*                                                                          */
1031 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1032 /* any specified interfaces and proxy labels, and determines whether or not */
1033 /* all proxy labels are correctly specified.				    */
1034 /*									    */
1035 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1036 /* ------------------------------------------------------------------------ */
1037 static int nat_resolverule(n, ifs)
1038 ipnat_t *n;
1039 ipf_stack_t *ifs;
1040 {
1041 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1042 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1043 
1044 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1045 	if (n->in_ifnames[1][0] == '\0') {
1046 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1047 		n->in_ifps[1] = n->in_ifps[0];
1048 	} else {
1049 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1050 	}
1051 
1052 	if (n->in_plabel[0] != '\0') {
1053 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1054 		if (n->in_apr == NULL)
1055 			return -1;
1056 	}
1057 	return 0;
1058 }
1059 
1060 
1061 /* ------------------------------------------------------------------------ */
1062 /* Function:    nat_siocdelnat                                              */
1063 /* Returns:     int - 0 == success, != 0 == failure                         */
1064 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1065 /*              np(I)      - pointer to where to insert new NAT rule        */
1066 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1067 /* Mutex Locks: ipf_natio                                                   */
1068 /*                                                                          */
1069 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1070 /* from information passed to the kernel, then add it  to the appropriate   */
1071 /* NAT rule table(s).                                                       */
1072 /* ------------------------------------------------------------------------ */
1073 static void nat_siocdelnat(n, np, getlock, ifs)
1074 ipnat_t *n, **np;
1075 int getlock;
1076 ipf_stack_t *ifs;
1077 {
1078 	if (getlock) {
1079 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1080 	}
1081 	if (n->in_redir & NAT_REDIRECT)
1082 		nat_delrdr(n);
1083 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1084 		nat_delnat(n);
1085 	if (ifs->ifs_nat_list == NULL) {
1086 		ifs->ifs_nat_masks = 0;
1087 		ifs->ifs_rdr_masks = 0;
1088 	}
1089 
1090 	if (n->in_tqehead[0] != NULL) {
1091 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1092 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1093 		}
1094 	}
1095 
1096 	if (n->in_tqehead[1] != NULL) {
1097 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1098 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1099 		}
1100 	}
1101 
1102 	*np = n->in_next;
1103 
1104 	if (n->in_use == 0) {
1105 		if (n->in_apr)
1106 			appr_free(n->in_apr);
1107 		KFREE(n);
1108 		ifs->ifs_nat_stats.ns_rules--;
1109 	} else {
1110 		n->in_flags |= IPN_DELETE;
1111 		n->in_next = NULL;
1112 	}
1113 	if (getlock) {
1114 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1115 	}
1116 }
1117 
1118 
1119 /* ------------------------------------------------------------------------ */
1120 /* Function:    fr_natgetsz                                                 */
1121 /* Returns:     int - 0 == success, != 0 is the error value.                */
1122 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1123 /*                        get the size of.                                  */
1124 /*                                                                          */
1125 /* Handle SIOCSTGSZ.                                                        */
1126 /* Return the size of the nat list entry to be copied back to user space.   */
1127 /* The size of the entry is stored in the ng_sz field and the enture natget */
1128 /* structure is copied back to the user.                                    */
1129 /* ------------------------------------------------------------------------ */
1130 static int fr_natgetsz(data, ifs)
1131 caddr_t data;
1132 ipf_stack_t *ifs;
1133 {
1134 	ap_session_t *aps;
1135 	nat_t *nat, *n;
1136 	natget_t ng;
1137 
1138 	BCOPYIN(data, &ng, sizeof(ng));
1139 
1140 	nat = ng.ng_ptr;
1141 	if (!nat) {
1142 		nat = ifs->ifs_nat_instances;
1143 		ng.ng_sz = 0;
1144 		/*
1145 		 * Empty list so the size returned is 0.  Simple.
1146 		 */
1147 		if (nat == NULL) {
1148 			BCOPYOUT(&ng, data, sizeof(ng));
1149 			return 0;
1150 		}
1151 	} else {
1152 		/*
1153 		 * Make sure the pointer we're copying from exists in the
1154 		 * current list of entries.  Security precaution to prevent
1155 		 * copying of random kernel data.
1156 		 */
1157 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1158 			if (n == nat)
1159 				break;
1160 		if (!n)
1161 			return ESRCH;
1162 	}
1163 
1164 	/*
1165 	 * Incluse any space required for proxy data structures.
1166 	 */
1167 	ng.ng_sz = sizeof(nat_save_t);
1168 	aps = nat->nat_aps;
1169 	if (aps != NULL) {
1170 		ng.ng_sz += sizeof(ap_session_t) - 4;
1171 		if (aps->aps_data != 0)
1172 			ng.ng_sz += aps->aps_psiz;
1173 	}
1174 
1175 	BCOPYOUT(&ng, data, sizeof(ng));
1176 	return 0;
1177 }
1178 
1179 
1180 /* ------------------------------------------------------------------------ */
1181 /* Function:    fr_natgetent                                                */
1182 /* Returns:     int - 0 == success, != 0 is the error value.                */
1183 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1184 /*                        to NAT structure to copy out.                     */
1185 /*                                                                          */
1186 /* Handle SIOCSTGET.                                                        */
1187 /* Copies out NAT entry to user space.  Any additional data held for a      */
1188 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1189 /* ------------------------------------------------------------------------ */
1190 static int fr_natgetent(data, ifs)
1191 caddr_t data;
1192 ipf_stack_t *ifs;
1193 {
1194 	int error, outsize;
1195 	ap_session_t *aps;
1196 	nat_save_t *ipn, ipns;
1197 	nat_t *n, *nat;
1198 
1199 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1200 	if (error != 0)
1201 		return error;
1202 
1203 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1204 		return EINVAL;
1205 
1206 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1207 	if (ipn == NULL)
1208 		return ENOMEM;
1209 
1210 	ipn->ipn_dsize = ipns.ipn_dsize;
1211 	nat = ipns.ipn_next;
1212 	if (nat == NULL) {
1213 		nat = ifs->ifs_nat_instances;
1214 		if (nat == NULL) {
1215 			if (ifs->ifs_nat_instances == NULL)
1216 				error = ENOENT;
1217 			goto finished;
1218 		}
1219 	} else {
1220 		/*
1221 		 * Make sure the pointer we're copying from exists in the
1222 		 * current list of entries.  Security precaution to prevent
1223 		 * copying of random kernel data.
1224 		 */
1225 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1226 			if (n == nat)
1227 				break;
1228 		if (n == NULL) {
1229 			error = ESRCH;
1230 			goto finished;
1231 		}
1232 	}
1233 	ipn->ipn_next = nat->nat_next;
1234 
1235 	/*
1236 	 * Copy the NAT structure.
1237 	 */
1238 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1239 
1240 	/*
1241 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1242 	 */
1243 	if (nat->nat_ptr != NULL)
1244 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1245 		      sizeof(ipn->ipn_ipnat));
1246 
1247 	/*
1248 	 * If we also know the NAT entry has an associated filter rule,
1249 	 * save that too.
1250 	 */
1251 	if (nat->nat_fr != NULL)
1252 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1253 		      sizeof(ipn->ipn_fr));
1254 
1255 	/*
1256 	 * Last but not least, if there is an application proxy session set
1257 	 * up for this NAT entry, then copy that out too, including any
1258 	 * private data saved along side it by the proxy.
1259 	 */
1260 	aps = nat->nat_aps;
1261 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1262 	if (aps != NULL) {
1263 		char *s;
1264 
1265 		if (outsize < sizeof(*aps)) {
1266 			error = ENOBUFS;
1267 			goto finished;
1268 		}
1269 
1270 		s = ipn->ipn_data;
1271 		bcopy((char *)aps, s, sizeof(*aps));
1272 		s += sizeof(*aps);
1273 		outsize -= sizeof(*aps);
1274 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1275 			bcopy(aps->aps_data, s, aps->aps_psiz);
1276 		else
1277 			error = ENOBUFS;
1278 	}
1279 	if (error == 0) {
1280 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1281 	}
1282 
1283 finished:
1284 	if (ipn != NULL) {
1285 		KFREES(ipn, ipns.ipn_dsize);
1286 	}
1287 	return error;
1288 }
1289 
1290 
1291 /* ------------------------------------------------------------------------ */
1292 /* Function:    fr_natputent                                                */
1293 /* Returns:     int - 0 == success, != 0 is the error value.                */
1294 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1295 /*                            structure information to load into the kernel */
1296 /*              getlock(I) - flag indicating whether or not a write lock    */
1297 /*                           on ipf_nat is already held.                    */
1298 /*                                                                          */
1299 /* Handle SIOCSTPUT.                                                        */
1300 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1301 /* firewall rule data structures, if pointers to them indicate so.          */
1302 /* ------------------------------------------------------------------------ */
1303 static int fr_natputent(data, getlock, ifs)
1304 caddr_t data;
1305 int getlock;
1306 ipf_stack_t *ifs;
1307 {
1308 	nat_save_t ipn, *ipnn;
1309 	ap_session_t *aps;
1310 	nat_t *n, *nat;
1311 	frentry_t *fr;
1312 	fr_info_t fin;
1313 	ipnat_t *in;
1314 	int error;
1315 
1316 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1317 	if (error != 0)
1318 		return error;
1319 
1320 	/*
1321 	 * Trigger automatic call to nat_extraflush() if the
1322 	 * table has reached capcity specified by hi watermark.
1323 	 */
1324 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1325 		ifs->ifs_nat_doflush = 1;
1326 
1327 	/*
1328 	 * Initialise early because of code at junkput label.
1329 	 */
1330 	in = NULL;
1331 	aps = NULL;
1332 	nat = NULL;
1333 	ipnn = NULL;
1334 
1335 	/*
1336 	 * New entry, copy in the rest of the NAT entry if it's size is more
1337 	 * than just the nat_t structure.
1338 	 */
1339 	fr = NULL;
1340 	if (ipn.ipn_dsize > sizeof(ipn)) {
1341 		if (ipn.ipn_dsize > 81920) {
1342 			error = ENOMEM;
1343 			goto junkput;
1344 		}
1345 
1346 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1347 		if (ipnn == NULL)
1348 			return ENOMEM;
1349 
1350 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1351 		if (error != 0) {
1352 			error = EFAULT;
1353 			goto junkput;
1354 		}
1355 	} else
1356 		ipnn = &ipn;
1357 
1358 	KMALLOC(nat, nat_t *);
1359 	if (nat == NULL) {
1360 		error = ENOMEM;
1361 		goto junkput;
1362 	}
1363 
1364 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1365 	/*
1366 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1367 	 */
1368 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1369 	nat->nat_tqe.tqe_pnext = NULL;
1370 	nat->nat_tqe.tqe_next = NULL;
1371 	nat->nat_tqe.tqe_ifq = NULL;
1372 	nat->nat_tqe.tqe_parent = nat;
1373 
1374 	/*
1375 	 * Restore the rule associated with this nat session
1376 	 */
1377 	in = ipnn->ipn_nat.nat_ptr;
1378 	if (in != NULL) {
1379 		KMALLOC(in, ipnat_t *);
1380 		nat->nat_ptr = in;
1381 		if (in == NULL) {
1382 			error = ENOMEM;
1383 			goto junkput;
1384 		}
1385 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1386 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1387 		in->in_use = 1;
1388 		in->in_flags |= IPN_DELETE;
1389 
1390 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1391 
1392 		if (nat_resolverule(in, ifs) != 0) {
1393 			error = ESRCH;
1394 			goto junkput;
1395 		}
1396 	}
1397 
1398 	/*
1399 	 * Check that the NAT entry doesn't already exist in the kernel.
1400 	 */
1401 	bzero((char *)&fin, sizeof(fin));
1402 	fin.fin_p = nat->nat_p;
1403 	fin.fin_ifs = ifs;
1404 	if (nat->nat_dir == NAT_OUTBOUND) {
1405 		fin.fin_data[0] = ntohs(nat->nat_oport);
1406 		fin.fin_data[1] = ntohs(nat->nat_outport);
1407 		fin.fin_ifp = nat->nat_ifps[0];
1408 		if (getlock) {
1409 			READ_ENTER(&ifs->ifs_ipf_nat);
1410 		}
1411 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1412 			nat->nat_oip, nat->nat_outip);
1413 		if (getlock) {
1414 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1415 		}
1416 		if (n != NULL) {
1417 			error = EEXIST;
1418 			goto junkput;
1419 		}
1420 	} else if (nat->nat_dir == NAT_INBOUND) {
1421 		fin.fin_data[0] = ntohs(nat->nat_inport);
1422 		fin.fin_data[1] = ntohs(nat->nat_oport);
1423 		fin.fin_ifp = nat->nat_ifps[1];
1424 		if (getlock) {
1425 			READ_ENTER(&ifs->ifs_ipf_nat);
1426 		}
1427 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1428 			nat->nat_inip, nat->nat_oip);
1429 		if (getlock) {
1430 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1431 		}
1432 		if (n != NULL) {
1433 			error = EEXIST;
1434 			goto junkput;
1435 		}
1436 	} else {
1437 		error = EINVAL;
1438 		goto junkput;
1439 	}
1440 
1441 	/*
1442 	 * Restore ap_session_t structure.  Include the private data allocated
1443 	 * if it was there.
1444 	 */
1445 	aps = nat->nat_aps;
1446 	if (aps != NULL) {
1447 		KMALLOC(aps, ap_session_t *);
1448 		nat->nat_aps = aps;
1449 		if (aps == NULL) {
1450 			error = ENOMEM;
1451 			goto junkput;
1452 		}
1453 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1454 		if (in != NULL)
1455 			aps->aps_apr = in->in_apr;
1456 		else
1457 			aps->aps_apr = NULL;
1458 		if (aps->aps_psiz != 0) {
1459 			if (aps->aps_psiz > 81920) {
1460 				error = ENOMEM;
1461 				goto junkput;
1462 			}
1463 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1464 			if (aps->aps_data == NULL) {
1465 				error = ENOMEM;
1466 				goto junkput;
1467 			}
1468 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1469 			      aps->aps_psiz);
1470 		} else {
1471 			aps->aps_psiz = 0;
1472 			aps->aps_data = NULL;
1473 		}
1474 	}
1475 
1476 	/*
1477 	 * If there was a filtering rule associated with this entry then
1478 	 * build up a new one.
1479 	 */
1480 	fr = nat->nat_fr;
1481 	if (fr != NULL) {
1482 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1483 			KMALLOC(fr, frentry_t *);
1484 			nat->nat_fr = fr;
1485 			if (fr == NULL) {
1486 				error = ENOMEM;
1487 				goto junkput;
1488 			}
1489 			ipnn->ipn_nat.nat_fr = fr;
1490 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1491 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1492 
1493 			fr->fr_ref = 1;
1494 			fr->fr_dsize = 0;
1495 			fr->fr_data = NULL;
1496 			fr->fr_type = FR_T_NONE;
1497 
1498 			MUTEX_NUKE(&fr->fr_lock);
1499 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1500 		} else {
1501 			if (getlock) {
1502 				READ_ENTER(&ifs->ifs_ipf_nat);
1503 			}
1504 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1505 				if (n->nat_fr == fr)
1506 					break;
1507 
1508 			if (n != NULL) {
1509 				MUTEX_ENTER(&fr->fr_lock);
1510 				fr->fr_ref++;
1511 				MUTEX_EXIT(&fr->fr_lock);
1512 			}
1513 			if (getlock) {
1514 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1515 			}
1516 			if (!n) {
1517 				error = ESRCH;
1518 				goto junkput;
1519 			}
1520 		}
1521 	}
1522 
1523 	if (ipnn != &ipn) {
1524 		KFREES(ipnn, ipn.ipn_dsize);
1525 		ipnn = NULL;
1526 	}
1527 
1528 	if (getlock) {
1529 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1530 	}
1531 	error = nat_insert(nat, nat->nat_rev, ifs);
1532 	if ((error == 0) && (aps != NULL)) {
1533 		aps->aps_next = ifs->ifs_ap_sess_list;
1534 		ifs->ifs_ap_sess_list = aps;
1535 	}
1536 	if (getlock) {
1537 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1538 	}
1539 
1540 	if (error == 0)
1541 		return 0;
1542 
1543 	error = ENOMEM;
1544 
1545 junkput:
1546 	if (fr != NULL)
1547 		(void) fr_derefrule(&fr, ifs);
1548 
1549 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1550 		KFREES(ipnn, ipn.ipn_dsize);
1551 	}
1552 	if (nat != NULL) {
1553 		if (aps != NULL) {
1554 			if (aps->aps_data != NULL) {
1555 				KFREES(aps->aps_data, aps->aps_psiz);
1556 			}
1557 			KFREE(aps);
1558 		}
1559 		if (in != NULL) {
1560 			if (in->in_apr)
1561 				appr_free(in->in_apr);
1562 			KFREE(in);
1563 		}
1564 		KFREE(nat);
1565 	}
1566 	return error;
1567 }
1568 
1569 
1570 /* ------------------------------------------------------------------------ */
1571 /* Function:    nat_delete                                                  */
1572 /* Returns:     Nil                                                         */
1573 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1574 /*              logtype(I) - type of LOG record to create before deleting   */
1575 /* Write Lock:  ipf_nat                                                     */
1576 /*                                                                          */
1577 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1578 /* enabled then generate a NAT log record for this event.                   */
1579 /* ------------------------------------------------------------------------ */
1580 static void nat_delete(nat, logtype, ifs)
1581 struct nat *nat;
1582 int logtype;
1583 ipf_stack_t *ifs;
1584 {
1585 	struct ipnat *ipn;
1586 
1587 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1588 		nat_log(nat, logtype, ifs);
1589 
1590 	/*
1591 	 * Take it as a general indication that all the pointers are set if
1592 	 * nat_pnext is set.
1593 	 */
1594 	if (nat->nat_pnext != NULL) {
1595 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1596 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1597 
1598 		*nat->nat_pnext = nat->nat_next;
1599 		if (nat->nat_next != NULL) {
1600 			nat->nat_next->nat_pnext = nat->nat_pnext;
1601 			nat->nat_next = NULL;
1602 		}
1603 		nat->nat_pnext = NULL;
1604 
1605 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1606 		if (nat->nat_hnext[0] != NULL) {
1607 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1608 			nat->nat_hnext[0] = NULL;
1609 		}
1610 		nat->nat_phnext[0] = NULL;
1611 
1612 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1613 		if (nat->nat_hnext[1] != NULL) {
1614 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1615 			nat->nat_hnext[1] = NULL;
1616 		}
1617 		nat->nat_phnext[1] = NULL;
1618 
1619 		if ((nat->nat_flags & SI_WILDP) != 0)
1620 			ifs->ifs_nat_stats.ns_wilds--;
1621 	}
1622 
1623 	if (nat->nat_me != NULL) {
1624 		*nat->nat_me = NULL;
1625 		nat->nat_me = NULL;
1626 	}
1627 
1628 	fr_deletequeueentry(&nat->nat_tqe);
1629 
1630 	MUTEX_ENTER(&nat->nat_lock);
1631 	if (nat->nat_ref > 1) {
1632 		nat->nat_ref--;
1633 		MUTEX_EXIT(&nat->nat_lock);
1634 		return;
1635 	}
1636 	MUTEX_EXIT(&nat->nat_lock);
1637 
1638 	/*
1639 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1640 	 */
1641 	nat->nat_ref = 0;
1642 
1643 #ifdef	IPFILTER_SYNC
1644 	if (nat->nat_sync)
1645 		ipfsync_del(nat->nat_sync);
1646 #endif
1647 
1648 	if (nat->nat_fr != NULL)
1649 		(void)fr_derefrule(&nat->nat_fr, ifs);
1650 
1651 	if (nat->nat_hm != NULL)
1652 		nat_hostmapdel(nat->nat_hm);
1653 
1654 	/*
1655 	 * If there is an active reference from the nat entry to its parent
1656 	 * rule, decrement the rule's reference count and free it too if no
1657 	 * longer being used.
1658 	 */
1659 	ipn = nat->nat_ptr;
1660 	if (ipn != NULL) {
1661 		ipn->in_space++;
1662 		ipn->in_use--;
1663 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1664 			if (ipn->in_apr)
1665 				appr_free(ipn->in_apr);
1666 			KFREE(ipn);
1667 			ifs->ifs_nat_stats.ns_rules--;
1668 		}
1669 	}
1670 
1671 	MUTEX_DESTROY(&nat->nat_lock);
1672 
1673 	aps_free(nat->nat_aps, ifs);
1674 	ifs->ifs_nat_stats.ns_inuse--;
1675 
1676 	/*
1677 	 * If there's a fragment table entry too for this nat entry, then
1678 	 * dereference that as well.  This is after nat_lock is released
1679 	 * because of Tru64.
1680 	 */
1681 	fr_forgetnat((void *)nat, ifs);
1682 
1683 	KFREE(nat);
1684 }
1685 
1686 
1687 /* ------------------------------------------------------------------------ */
1688 /* Function:    nat_flushtable                                              */
1689 /* Returns:     int - number of NAT rules deleted                           */
1690 /* Parameters:  Nil                                                         */
1691 /*                                                                          */
1692 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1693 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1694 /* ------------------------------------------------------------------------ */
1695 /*
1696  * nat_flushtable - clear the NAT table of all mapping entries.
1697  */
1698 static int nat_flushtable(ifs)
1699 ipf_stack_t *ifs;
1700 {
1701 	nat_t *nat;
1702 	int j = 0;
1703 
1704 	/*
1705 	 * ALL NAT mappings deleted, so lets just make the deletions
1706 	 * quicker.
1707 	 */
1708 	if (ifs->ifs_nat_table[0] != NULL)
1709 		bzero((char *)ifs->ifs_nat_table[0],
1710 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1711 	if (ifs->ifs_nat_table[1] != NULL)
1712 		bzero((char *)ifs->ifs_nat_table[1],
1713 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1714 
1715 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1716 		nat_delete(nat, NL_FLUSH, ifs);
1717 		j++;
1718 	}
1719 
1720 	ifs->ifs_nat_stats.ns_inuse = 0;
1721 	return j;
1722 }
1723 
1724 
1725 /* ------------------------------------------------------------------------ */
1726 /* Function:    nat_clearlist                                               */
1727 /* Returns:     int - number of NAT/RDR rules deleted                       */
1728 /* Parameters:  Nil                                                         */
1729 /*                                                                          */
1730 /* Delete all rules in the current list of rules.  There is nothing elegant */
1731 /* about this cleanup: simply free all entries on the list of rules and     */
1732 /* clear out the tables used for hashed NAT rule lookups.                   */
1733 /* ------------------------------------------------------------------------ */
1734 static int nat_clearlist(ifs)
1735 ipf_stack_t *ifs;
1736 {
1737 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1738 	int i = 0;
1739 
1740 	if (ifs->ifs_nat_rules != NULL)
1741 		bzero((char *)ifs->ifs_nat_rules,
1742 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1743 	if (ifs->ifs_rdr_rules != NULL)
1744 		bzero((char *)ifs->ifs_rdr_rules,
1745 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1746 
1747 	while ((n = *np) != NULL) {
1748 		*np = n->in_next;
1749 		if (n->in_use == 0) {
1750 			if (n->in_apr != NULL)
1751 				appr_free(n->in_apr);
1752 			KFREE(n);
1753 			ifs->ifs_nat_stats.ns_rules--;
1754 		} else {
1755 			n->in_flags |= IPN_DELETE;
1756 			n->in_next = NULL;
1757 		}
1758 		i++;
1759 	}
1760 	ifs->ifs_nat_masks = 0;
1761 	ifs->ifs_rdr_masks = 0;
1762 	return i;
1763 }
1764 
1765 
1766 /* ------------------------------------------------------------------------ */
1767 /* Function:    nat_newmap                                                  */
1768 /* Returns:     int - -1 == error, 0 == success                             */
1769 /* Parameters:  fin(I) - pointer to packet information                      */
1770 /*              nat(I) - pointer to NAT entry                               */
1771 /*              ni(I)  - pointer to structure with misc. information needed */
1772 /*                       to create new NAT entry.                           */
1773 /*                                                                          */
1774 /* Given an empty NAT structure, populate it with new information about a   */
1775 /* new NAT session, as defined by the matching NAT rule.                    */
1776 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1777 /* to the new IP address for the translation.                               */
1778 /* ------------------------------------------------------------------------ */
1779 static INLINE int nat_newmap(fin, nat, ni)
1780 fr_info_t *fin;
1781 nat_t *nat;
1782 natinfo_t *ni;
1783 {
1784 	u_short st_port, dport, sport, port, sp, dp;
1785 	struct in_addr in, inb;
1786 	hostmap_t *hm;
1787 	u_32_t flags;
1788 	u_32_t st_ip;
1789 	ipnat_t *np;
1790 	nat_t *natl;
1791 	int l;
1792 	ipf_stack_t *ifs = fin->fin_ifs;
1793 
1794 	/*
1795 	 * If it's an outbound packet which doesn't match any existing
1796 	 * record, then create a new port
1797 	 */
1798 	l = 0;
1799 	hm = NULL;
1800 	np = ni->nai_np;
1801 	st_ip = np->in_nip;
1802 	st_port = np->in_pnext;
1803 	flags = ni->nai_flags;
1804 	sport = ni->nai_sport;
1805 	dport = ni->nai_dport;
1806 
1807 	/*
1808 	 * Do a loop until we either run out of entries to try or we find
1809 	 * a NAT mapping that isn't currently being used.  This is done
1810 	 * because the change to the source is not (usually) being fixed.
1811 	 */
1812 	do {
1813 		port = 0;
1814 		in.s_addr = htonl(np->in_nip);
1815 		if (l == 0) {
1816 			/*
1817 			 * Check to see if there is an existing NAT
1818 			 * setup for this IP address pair.
1819 			 */
1820 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1821 					 in, 0, ifs);
1822 			if (hm != NULL)
1823 				in.s_addr = hm->hm_mapip.s_addr;
1824 		} else if ((l == 1) && (hm != NULL)) {
1825 			nat_hostmapdel(hm);
1826 			hm = NULL;
1827 		}
1828 		in.s_addr = ntohl(in.s_addr);
1829 
1830 		nat->nat_hm = hm;
1831 
1832 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1833 			if (l > 0)
1834 				return -1;
1835 		}
1836 
1837 		if (np->in_redir == NAT_BIMAP &&
1838 		    np->in_inmsk == np->in_outmsk) {
1839 			/*
1840 			 * map the address block in a 1:1 fashion
1841 			 */
1842 			in.s_addr = np->in_outip;
1843 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1844 			in.s_addr = ntohl(in.s_addr);
1845 
1846 		} else if (np->in_redir & NAT_MAPBLK) {
1847 			if ((l >= np->in_ppip) || ((l > 0) &&
1848 			     !(flags & IPN_TCPUDP)))
1849 				return -1;
1850 			/*
1851 			 * map-block - Calculate destination address.
1852 			 */
1853 			in.s_addr = ntohl(fin->fin_saddr);
1854 			in.s_addr &= ntohl(~np->in_inmsk);
1855 			inb.s_addr = in.s_addr;
1856 			in.s_addr /= np->in_ippip;
1857 			in.s_addr &= ntohl(~np->in_outmsk);
1858 			in.s_addr += ntohl(np->in_outip);
1859 			/*
1860 			 * Calculate destination port.
1861 			 */
1862 			if ((flags & IPN_TCPUDP) &&
1863 			    (np->in_ppip != 0)) {
1864 				port = ntohs(sport) + l;
1865 				port %= np->in_ppip;
1866 				port += np->in_ppip *
1867 					(inb.s_addr % np->in_ippip);
1868 				port += MAPBLK_MINPORT;
1869 				port = htons(port);
1870 			}
1871 
1872 		} else if ((np->in_outip == 0) &&
1873 			   (np->in_outmsk == 0xffffffff)) {
1874 			/*
1875 			 * 0/32 - use the interface's IP address.
1876 			 */
1877 			if ((l > 0) ||
1878 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1879 				       &in, NULL, fin->fin_ifs) == -1)
1880 				return -1;
1881 			in.s_addr = ntohl(in.s_addr);
1882 
1883 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1884 			/*
1885 			 * 0/0 - use the original source address/port.
1886 			 */
1887 			if (l > 0)
1888 				return -1;
1889 			in.s_addr = ntohl(fin->fin_saddr);
1890 
1891 		} else if ((np->in_outmsk != 0xffffffff) &&
1892 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1893 			np->in_nip++;
1894 
1895 		natl = NULL;
1896 
1897 		if ((flags & IPN_TCPUDP) &&
1898 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1899 		    (np->in_flags & IPN_AUTOPORTMAP)) {
1900 			/*
1901 			 * "ports auto" (without map-block)
1902 			 */
1903 			if ((l > 0) && (l % np->in_ppip == 0)) {
1904 				if (l > np->in_space) {
1905 					return -1;
1906 				} else if ((l > np->in_ppip) &&
1907 					   np->in_outmsk != 0xffffffff)
1908 					np->in_nip++;
1909 			}
1910 			if (np->in_ppip != 0) {
1911 				port = ntohs(sport);
1912 				port += (l % np->in_ppip);
1913 				port %= np->in_ppip;
1914 				port += np->in_ppip *
1915 					(ntohl(fin->fin_saddr) %
1916 					 np->in_ippip);
1917 				port += MAPBLK_MINPORT;
1918 				port = htons(port);
1919 			}
1920 
1921 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1922 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
1923 			/*
1924 			 * Standard port translation.  Select next port.
1925 			 */
1926 			port = htons(np->in_pnext++);
1927 
1928 			if (np->in_pnext > ntohs(np->in_pmax)) {
1929 				np->in_pnext = ntohs(np->in_pmin);
1930 				if (np->in_outmsk != 0xffffffff)
1931 					np->in_nip++;
1932 			}
1933 		}
1934 
1935 		if (np->in_flags & IPN_IPRANGE) {
1936 			if (np->in_nip > ntohl(np->in_outmsk))
1937 				np->in_nip = ntohl(np->in_outip);
1938 		} else {
1939 			if ((np->in_outmsk != 0xffffffff) &&
1940 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1941 			    ntohl(np->in_outip))
1942 				np->in_nip = ntohl(np->in_outip) + 1;
1943 		}
1944 
1945 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
1946 			port = sport;
1947 
1948 		/*
1949 		 * Here we do a lookup of the connection as seen from
1950 		 * the outside.  If an IP# pair already exists, try
1951 		 * again.  So if you have A->B becomes C->B, you can
1952 		 * also have D->E become C->E but not D->B causing
1953 		 * another C->B.  Also take protocol and ports into
1954 		 * account when determining whether a pre-existing
1955 		 * NAT setup will cause an external conflict where
1956 		 * this is appropriate.
1957 		 */
1958 		inb.s_addr = htonl(in.s_addr);
1959 		sp = fin->fin_data[0];
1960 		dp = fin->fin_data[1];
1961 		fin->fin_data[0] = fin->fin_data[1];
1962 		fin->fin_data[1] = htons(port);
1963 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
1964 				    (u_int)fin->fin_p, fin->fin_dst, inb);
1965 		fin->fin_data[0] = sp;
1966 		fin->fin_data[1] = dp;
1967 
1968 		/*
1969 		 * Has the search wrapped around and come back to the
1970 		 * start ?
1971 		 */
1972 		if ((natl != NULL) &&
1973 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1974 		    (np->in_nip != 0) && (st_ip == np->in_nip))
1975 			return -1;
1976 		l++;
1977 	} while (natl != NULL);
1978 
1979 	if (np->in_space > 0)
1980 		np->in_space--;
1981 
1982 	/* Setup the NAT table */
1983 	nat->nat_inip = fin->fin_src;
1984 	nat->nat_outip.s_addr = htonl(in.s_addr);
1985 	nat->nat_oip = fin->fin_dst;
1986 	if (nat->nat_hm == NULL)
1987 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1988 					  nat->nat_outip, 0, ifs);
1989 
1990 	/*
1991 	 * The ICMP checksum does not have a pseudo header containing
1992 	 * the IP addresses
1993 	 */
1994 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1995 	ni->nai_sum2 = LONG_SUM(in.s_addr);
1996 	if ((flags & IPN_TCPUDP)) {
1997 		ni->nai_sum1 += ntohs(sport);
1998 		ni->nai_sum2 += ntohs(port);
1999 	}
2000 
2001 	if (flags & IPN_TCPUDP) {
2002 		nat->nat_inport = sport;
2003 		nat->nat_outport = port;	/* sport */
2004 		nat->nat_oport = dport;
2005 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2006 	} else if (flags & IPN_ICMPQUERY) {
2007 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2008 		nat->nat_inport = port;
2009 		nat->nat_outport = port;
2010 	}
2011 
2012 	ni->nai_ip.s_addr = in.s_addr;
2013 	ni->nai_port = port;
2014 	ni->nai_nport = dport;
2015 	return 0;
2016 }
2017 
2018 
2019 /* ------------------------------------------------------------------------ */
2020 /* Function:    nat_newrdr                                                  */
2021 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2022 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2023 /* Parameters:  fin(I) - pointer to packet information                      */
2024 /*              nat(I) - pointer to NAT entry                               */
2025 /*              ni(I)  - pointer to structure with misc. information needed */
2026 /*                       to create new NAT entry.                           */
2027 /*                                                                          */
2028 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2029 /* to the new IP address for the translation.                               */
2030 /* ------------------------------------------------------------------------ */
2031 static INLINE int nat_newrdr(fin, nat, ni)
2032 fr_info_t *fin;
2033 nat_t *nat;
2034 natinfo_t *ni;
2035 {
2036 	u_short nport, dport, sport;
2037 	struct in_addr in;
2038 	hostmap_t *hm;
2039 	u_32_t flags;
2040 	ipnat_t *np;
2041 	int move;
2042 	ipf_stack_t *ifs = fin->fin_ifs;
2043 
2044 	move = 1;
2045 	hm = NULL;
2046 	in.s_addr = 0;
2047 	np = ni->nai_np;
2048 	flags = ni->nai_flags;
2049 	sport = ni->nai_sport;
2050 	dport = ni->nai_dport;
2051 
2052 	/*
2053 	 * If the matching rule has IPN_STICKY set, then we want to have the
2054 	 * same rule kick in as before.  Why would this happen?  If you have
2055 	 * a collection of rdr rules with "round-robin sticky", the current
2056 	 * packet might match a different one to the previous connection but
2057 	 * we want the same destination to be used.
2058 	 */
2059 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2060 	    (IPN_ROUNDR|IPN_STICKY)) {
2061 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2062 				 (u_32_t)dport, ifs);
2063 		if (hm != NULL) {
2064 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2065 			np = hm->hm_ipnat;
2066 			ni->nai_np = np;
2067 			move = 0;
2068 		}
2069 	}
2070 
2071 	/*
2072 	 * Otherwise, it's an inbound packet. Most likely, we don't
2073 	 * want to rewrite source ports and source addresses. Instead,
2074 	 * we want to rewrite to a fixed internal address and fixed
2075 	 * internal port.
2076 	 */
2077 	if (np->in_flags & IPN_SPLIT) {
2078 		in.s_addr = np->in_nip;
2079 
2080 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2081 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2082 					 in, (u_32_t)dport, ifs);
2083 			if (hm != NULL) {
2084 				in.s_addr = hm->hm_mapip.s_addr;
2085 				move = 0;
2086 			}
2087 		}
2088 
2089 		if (hm == NULL || hm->hm_ref == 1) {
2090 			if (np->in_inip == htonl(in.s_addr)) {
2091 				np->in_nip = ntohl(np->in_inmsk);
2092 				move = 0;
2093 			} else {
2094 				np->in_nip = ntohl(np->in_inip);
2095 			}
2096 		}
2097 
2098 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2099 		/*
2100 		 * 0/32 - use the interface's IP address.
2101 		 */
2102 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2103 			   fin->fin_ifs) == -1)
2104 			return -1;
2105 		in.s_addr = ntohl(in.s_addr);
2106 
2107 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2108 		/*
2109 		 * 0/0 - use the original destination address/port.
2110 		 */
2111 		in.s_addr = ntohl(fin->fin_daddr);
2112 
2113 	} else if (np->in_redir == NAT_BIMAP &&
2114 		   np->in_inmsk == np->in_outmsk) {
2115 		/*
2116 		 * map the address block in a 1:1 fashion
2117 		 */
2118 		in.s_addr = np->in_inip;
2119 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2120 		in.s_addr = ntohl(in.s_addr);
2121 	} else {
2122 		in.s_addr = ntohl(np->in_inip);
2123 	}
2124 
2125 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2126 		nport = dport;
2127 	else {
2128 		/*
2129 		 * Whilst not optimized for the case where
2130 		 * pmin == pmax, the gain is not significant.
2131 		 */
2132 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2133 		    (np->in_pmin != np->in_pmax)) {
2134 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2135 				ntohs(np->in_pnext);
2136 			nport = htons(nport);
2137 		} else
2138 			nport = np->in_pnext;
2139 	}
2140 
2141 	/*
2142 	 * When the redirect-to address is set to 0.0.0.0, just
2143 	 * assume a blank `forwarding' of the packet.  We don't
2144 	 * setup any translation for this either.
2145 	 */
2146 	if (in.s_addr == 0) {
2147 		if (nport == dport)
2148 			return -1;
2149 		in.s_addr = ntohl(fin->fin_daddr);
2150 	}
2151 
2152 	nat->nat_inip.s_addr = htonl(in.s_addr);
2153 	nat->nat_outip = fin->fin_dst;
2154 	nat->nat_oip = fin->fin_src;
2155 
2156 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2157 	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2158 
2159 	ni->nai_ip.s_addr = in.s_addr;
2160 	ni->nai_nport = nport;
2161 	ni->nai_port = sport;
2162 
2163 	if (flags & IPN_TCPUDP) {
2164 		nat->nat_inport = nport;
2165 		nat->nat_outport = dport;
2166 		nat->nat_oport = sport;
2167 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2168 	} else if (flags & IPN_ICMPQUERY) {
2169 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2170 		nat->nat_inport = nport;
2171 		nat->nat_outport = nport;
2172 	}
2173 
2174 	return move;
2175 }
2176 
2177 /* ------------------------------------------------------------------------ */
2178 /* Function:    nat_new                                                     */
2179 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2180 /*                       else pointer to new NAT structure                  */
2181 /* Parameters:  fin(I)       - pointer to packet information                */
2182 /*              np(I)        - pointer to NAT rule                          */
2183 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2184 /*              flags(I)     - flags describing the current packet          */
2185 /*              direction(I) - direction of packet (in/out)                 */
2186 /* Write Lock:  ipf_nat                                                     */
2187 /*                                                                          */
2188 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2189 /* in any way.                                                              */
2190 /*                                                                          */
2191 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2192 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2193 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2194 /* and (3) building that structure and putting it into the NAT table(s).    */
2195 /* ------------------------------------------------------------------------ */
2196 nat_t *nat_new(fin, np, natsave, flags, direction)
2197 fr_info_t *fin;
2198 ipnat_t *np;
2199 nat_t **natsave;
2200 u_int flags;
2201 int direction;
2202 {
2203 	u_short port = 0, sport = 0, dport = 0, nport = 0;
2204 	tcphdr_t *tcp = NULL;
2205 	hostmap_t *hm = NULL;
2206 	struct in_addr in;
2207 	nat_t *nat, *natl;
2208 	u_int nflags;
2209 	natinfo_t ni;
2210 	u_32_t sumd;
2211 	int move;
2212 	ipf_stack_t *ifs = fin->fin_ifs;
2213 
2214 	/*
2215 	 * Trigger automatic call to nat_extraflush() if the
2216 	 * table has reached capcity specified by hi watermark.
2217 	 */
2218 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2219 		ifs->ifs_nat_doflush = 1;
2220 
2221 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2222 		ifs->ifs_nat_stats.ns_memfail++;
2223 		return NULL;
2224 	}
2225 
2226 	move = 1;
2227 	nflags = np->in_flags & flags;
2228 	nflags &= NAT_FROMRULE;
2229 
2230 	ni.nai_np = np;
2231 	ni.nai_nflags = nflags;
2232 	ni.nai_flags = flags;
2233 
2234 	/* Give me a new nat */
2235 	KMALLOC(nat, nat_t *);
2236 	if (nat == NULL) {
2237 		ifs->ifs_nat_stats.ns_memfail++;
2238 		/*
2239 		 * Try to automatically tune the max # of entries in the
2240 		 * table allowed to be less than what will cause kmem_alloc()
2241 		 * to fail and try to eliminate panics due to out of memory
2242 		 * conditions arising.
2243 		 */
2244 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2245 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2246 			printf("ipf_nattable_max reduced to %d\n",
2247 				ifs->ifs_ipf_nattable_max);
2248 		}
2249 		return NULL;
2250 	}
2251 
2252 	if (flags & IPN_TCPUDP) {
2253 		tcp = fin->fin_dp;
2254 		ni.nai_sport = htons(fin->fin_sport);
2255 		ni.nai_dport = htons(fin->fin_dport);
2256 	} else if (flags & IPN_ICMPQUERY) {
2257 		/*
2258 		 * In the ICMP query NAT code, we translate the ICMP id fields
2259 		 * to make them unique. This is indepedent of the ICMP type
2260 		 * (e.g. in the unlikely event that a host sends an echo and
2261 		 * an tstamp request with the same id, both packets will have
2262 		 * their ip address/id field changed in the same way).
2263 		 */
2264 		/* The icmp_id field is used by the sender to identify the
2265 		 * process making the icmp request. (the receiver justs
2266 		 * copies it back in its response). So, it closely matches
2267 		 * the concept of source port. We overlay sport, so we can
2268 		 * maximally reuse the existing code.
2269 		 */
2270 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2271 		ni.nai_dport = ni.nai_sport;
2272 	}
2273 
2274 	bzero((char *)nat, sizeof(*nat));
2275 	nat->nat_flags = flags;
2276 	nat->nat_redir = np->in_redir;
2277 
2278 	if ((flags & NAT_SLAVE) == 0) {
2279 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2280 	}
2281 
2282 	/*
2283 	 * Search the current table for a match.
2284 	 */
2285 	if (direction == NAT_OUTBOUND) {
2286 		/*
2287 		 * We can now arrange to call this for the same connection
2288 		 * because ipf_nat_new doesn't protect the code path into
2289 		 * this function.
2290 		 */
2291 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2292 				     fin->fin_src, fin->fin_dst);
2293 		if (natl != NULL) {
2294 			KFREE(nat);
2295 			nat = natl;
2296 			goto done;
2297 		}
2298 
2299 		move = nat_newmap(fin, nat, &ni);
2300 		if (move == -1)
2301 			goto badnat;
2302 
2303 		np = ni.nai_np;
2304 		in = ni.nai_ip;
2305 	} else {
2306 		/*
2307 		 * NAT_INBOUND is used only for redirects rules
2308 		 */
2309 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2310 				    fin->fin_src, fin->fin_dst);
2311 		if (natl != NULL) {
2312 			KFREE(nat);
2313 			nat = natl;
2314 			goto done;
2315 		}
2316 
2317 		move = nat_newrdr(fin, nat, &ni);
2318 		if (move == -1)
2319 			goto badnat;
2320 
2321 		np = ni.nai_np;
2322 		in = ni.nai_ip;
2323 	}
2324 	port = ni.nai_port;
2325 	nport = ni.nai_nport;
2326 
2327 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2328 		if (np->in_redir == NAT_REDIRECT) {
2329 			nat_delrdr(np);
2330 			nat_addrdr(np, ifs);
2331 		} else if (np->in_redir == NAT_MAP) {
2332 			nat_delnat(np);
2333 			nat_addnat(np, ifs);
2334 		}
2335 	}
2336 
2337 	if (flags & IPN_TCPUDP) {
2338 		sport = ni.nai_sport;
2339 		dport = ni.nai_dport;
2340 	} else if (flags & IPN_ICMPQUERY) {
2341 		sport = ni.nai_sport;
2342 		dport = 0;
2343 	}
2344 
2345 	/*
2346 	 * nat_sumd[0] stores adjustment value including both IP address and
2347 	 * port number changes. nat_sumd[1] stores adjustment value only for
2348 	 * IP address changes, to be used for pseudo header adjustment, in
2349 	 * case hardware partial checksum offload is offered.
2350 	 */
2351 	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2352 	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2353 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
2354 	if (flags & IPN_TCPUDP) {
2355 		ni.nai_sum1 = LONG_SUM(in.s_addr);
2356 		if (direction == NAT_OUTBOUND)
2357 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr));
2358 		else
2359 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr));
2360 
2361 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2362 		nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
2363 	} else
2364 #endif
2365 		nat->nat_sumd[1] = nat->nat_sumd[0];
2366 
2367 	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2368 		if (direction == NAT_OUTBOUND)
2369 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2370 		else
2371 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2372 
2373 		ni.nai_sum2 = LONG_SUM(in.s_addr);
2374 
2375 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2376 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2377 	} else {
2378 		nat->nat_ipsumd = nat->nat_sumd[0];
2379 		if (!(flags & IPN_TCPUDPICMP)) {
2380 			nat->nat_sumd[0] = 0;
2381 			nat->nat_sumd[1] = 0;
2382 		}
2383 	}
2384 
2385 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2386 		goto badnat;
2387 	}
2388 	if (flags & SI_WILDP)
2389 		ifs->ifs_nat_stats.ns_wilds++;
2390 	goto done;
2391 badnat:
2392 	ifs->ifs_nat_stats.ns_badnat++;
2393 	if ((hm = nat->nat_hm) != NULL)
2394 		nat_hostmapdel(hm);
2395 	KFREE(nat);
2396 	nat = NULL;
2397 done:
2398 	if ((flags & NAT_SLAVE) == 0) {
2399 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2400 	}
2401 	return nat;
2402 }
2403 
2404 
2405 /* ------------------------------------------------------------------------ */
2406 /* Function:    nat_finalise                                                */
2407 /* Returns:     int - 0 == sucess, -1 == failure                            */
2408 /* Parameters:  fin(I) - pointer to packet information                      */
2409 /*              nat(I) - pointer to NAT entry                               */
2410 /*              ni(I)  - pointer to structure with misc. information needed */
2411 /*                       to create new NAT entry.                           */
2412 /* Write Lock:  ipf_nat                                                     */
2413 /*                                                                          */
2414 /* This is the tail end of constructing a new NAT entry and is the same     */
2415 /* for both IPv4 and IPv6.                                                  */
2416 /* ------------------------------------------------------------------------ */
2417 /*ARGSUSED*/
2418 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2419 fr_info_t *fin;
2420 nat_t *nat;
2421 natinfo_t *ni;
2422 tcphdr_t *tcp;
2423 nat_t **natsave;
2424 int direction;
2425 {
2426 	frentry_t *fr;
2427 	ipnat_t *np;
2428 	ipf_stack_t *ifs = fin->fin_ifs;
2429 
2430 	np = ni->nai_np;
2431 
2432 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2433 
2434 #ifdef	IPFILTER_SYNC
2435 	if ((nat->nat_flags & SI_CLONE) == 0)
2436 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2437 #endif
2438 
2439 	nat->nat_me = natsave;
2440 	nat->nat_dir = direction;
2441 	nat->nat_ifps[0] = np->in_ifps[0];
2442 	nat->nat_ifps[1] = np->in_ifps[1];
2443 	nat->nat_ptr = np;
2444 	nat->nat_p = fin->fin_p;
2445 	nat->nat_mssclamp = np->in_mssclamp;
2446 	fr = fin->fin_fr;
2447 	nat->nat_fr = fr;
2448 
2449 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2450 		if (appr_new(fin, nat) == -1)
2451 			return -1;
2452 
2453 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2454 		if (ifs->ifs_nat_logging)
2455 			nat_log(nat, (u_int)np->in_redir, ifs);
2456 		np->in_use++;
2457 		if (fr != NULL) {
2458 			MUTEX_ENTER(&fr->fr_lock);
2459 			fr->fr_ref++;
2460 			MUTEX_EXIT(&fr->fr_lock);
2461 		}
2462 		return 0;
2463 	}
2464 
2465 	/*
2466 	 * nat_insert failed, so cleanup time...
2467 	 */
2468 	return -1;
2469 }
2470 
2471 
2472 /* ------------------------------------------------------------------------ */
2473 /* Function:   nat_insert                                                   */
2474 /* Returns:    int - 0 == sucess, -1 == failure                             */
2475 /* Parameters: nat(I) - pointer to NAT structure                            */
2476 /*             rev(I) - flag indicating forward/reverse direction of packet */
2477 /* Write Lock: ipf_nat                                                      */
2478 /*                                                                          */
2479 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2480 /* list of active NAT entries.  Adjust global counters when complete.       */
2481 /* ------------------------------------------------------------------------ */
2482 int	nat_insert(nat, rev, ifs)
2483 nat_t	*nat;
2484 int	rev;
2485 ipf_stack_t *ifs;
2486 {
2487 	u_int hv1, hv2;
2488 	nat_t **natp;
2489 
2490 	/*
2491 	 * Try and return an error as early as possible, so calculate the hash
2492 	 * entry numbers first and then proceed.
2493 	 */
2494 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2495 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2496 				  0xffffffff);
2497 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2498 				  ifs->ifs_ipf_nattable_sz);
2499 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2500 				  0xffffffff);
2501 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2502 				  ifs->ifs_ipf_nattable_sz);
2503 	} else {
2504 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2505 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2506 				  ifs->ifs_ipf_nattable_sz);
2507 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2508 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2509 				  ifs->ifs_ipf_nattable_sz);
2510 	}
2511 
2512 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2513 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2514 		return -1;
2515 	}
2516 
2517 	nat->nat_hv[0] = hv1;
2518 	nat->nat_hv[1] = hv2;
2519 
2520 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2521 
2522 	nat->nat_rev = rev;
2523 	nat->nat_ref = 1;
2524 	nat->nat_bytes[0] = 0;
2525 	nat->nat_pkts[0] = 0;
2526 	nat->nat_bytes[1] = 0;
2527 	nat->nat_pkts[1] = 0;
2528 
2529 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2530 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2531 
2532 	if (nat->nat_ifnames[1][0] !='\0') {
2533 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2534 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2535 	} else {
2536 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2537 			       LIFNAMSIZ);
2538 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2539 		nat->nat_ifps[1] = nat->nat_ifps[0];
2540 	}
2541 
2542 	nat->nat_next = ifs->ifs_nat_instances;
2543 	nat->nat_pnext = &ifs->ifs_nat_instances;
2544 	if (ifs->ifs_nat_instances)
2545 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2546 	ifs->ifs_nat_instances = nat;
2547 
2548 	natp = &ifs->ifs_nat_table[0][hv1];
2549 	if (*natp)
2550 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2551 	nat->nat_phnext[0] = natp;
2552 	nat->nat_hnext[0] = *natp;
2553 	*natp = nat;
2554 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2555 
2556 	natp = &ifs->ifs_nat_table[1][hv2];
2557 	if (*natp)
2558 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2559 	nat->nat_phnext[1] = natp;
2560 	nat->nat_hnext[1] = *natp;
2561 	*natp = nat;
2562 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2563 
2564 	fr_setnatqueue(nat, rev, ifs);
2565 
2566 	ifs->ifs_nat_stats.ns_added++;
2567 	ifs->ifs_nat_stats.ns_inuse++;
2568 	return 0;
2569 }
2570 
2571 
2572 /* ------------------------------------------------------------------------ */
2573 /* Function:    nat_icmperrorlookup                                         */
2574 /* Returns:     nat_t* - point to matching NAT structure                    */
2575 /* Parameters:  fin(I) - pointer to packet information                      */
2576 /*              dir(I) - direction of packet (in/out)                       */
2577 /*                                                                          */
2578 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2579 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2580 /* the required length.                                                     */
2581 /* ------------------------------------------------------------------------ */
2582 nat_t *nat_icmperrorlookup(fin, dir)
2583 fr_info_t *fin;
2584 int dir;
2585 {
2586 	int flags = 0, minlen;
2587 	icmphdr_t *orgicmp;
2588 	tcphdr_t *tcp = NULL;
2589 	u_short data[2];
2590 	nat_t *nat;
2591 	ip_t *oip;
2592 	u_int p;
2593 
2594 	/*
2595 	 * Does it at least have the return (basic) IP header ?
2596 	 * Only a basic IP header (no options) should be with an ICMP error
2597 	 * header.  Also, if it's not an error type, then return.
2598 	 */
2599 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2600 		return NULL;
2601 
2602 	/*
2603 	 * Check packet size
2604 	 */
2605 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2606 	minlen = IP_HL(oip) << 2;
2607 	if ((minlen < sizeof(ip_t)) ||
2608 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2609 		return NULL;
2610 	/*
2611 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2612 	 * header claimed in the encapsulated part which is of concern.  It
2613 	 * may be too big to be in this buffer but not so big that it's
2614 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2615 	 * This is possible because we don't know how big oip_hl is when we
2616 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2617 	 * all here now.
2618 	 */
2619 #ifdef  _KERNEL
2620 	{
2621 	mb_t *m;
2622 
2623 	m = fin->fin_m;
2624 # if defined(MENTAT)
2625 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2626 		return NULL;
2627 # else
2628 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2629 	    (char *)fin->fin_ip + M_LEN(m))
2630 		return NULL;
2631 # endif
2632 	}
2633 #endif
2634 
2635 	if (fin->fin_daddr != oip->ip_src.s_addr)
2636 		return NULL;
2637 
2638 	p = oip->ip_p;
2639 	if (p == IPPROTO_TCP)
2640 		flags = IPN_TCP;
2641 	else if (p == IPPROTO_UDP)
2642 		flags = IPN_UDP;
2643 	else if (p == IPPROTO_ICMP) {
2644 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2645 
2646 		/* see if this is related to an ICMP query */
2647 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2648 			data[0] = fin->fin_data[0];
2649 			data[1] = fin->fin_data[1];
2650 			fin->fin_data[0] = 0;
2651 			fin->fin_data[1] = orgicmp->icmp_id;
2652 
2653 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2654 			/*
2655 			 * NOTE : dir refers to the direction of the original
2656 			 *        ip packet. By definition the icmp error
2657 			 *        message flows in the opposite direction.
2658 			 */
2659 			if (dir == NAT_INBOUND)
2660 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2661 						   oip->ip_src);
2662 			else
2663 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2664 						    oip->ip_src);
2665 			fin->fin_data[0] = data[0];
2666 			fin->fin_data[1] = data[1];
2667 			return nat;
2668 		}
2669 	}
2670 
2671 	if (flags & IPN_TCPUDP) {
2672 		minlen += 8;		/* + 64bits of data to get ports */
2673 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2674 			return NULL;
2675 
2676 		data[0] = fin->fin_data[0];
2677 		data[1] = fin->fin_data[1];
2678 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2679 		fin->fin_data[0] = ntohs(tcp->th_dport);
2680 		fin->fin_data[1] = ntohs(tcp->th_sport);
2681 
2682 		if (dir == NAT_INBOUND) {
2683 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2684 					   oip->ip_src);
2685 		} else {
2686 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2687 					    oip->ip_src);
2688 		}
2689 		fin->fin_data[0] = data[0];
2690 		fin->fin_data[1] = data[1];
2691 		return nat;
2692 	}
2693 	if (dir == NAT_INBOUND)
2694 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2695 	else
2696 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2697 }
2698 
2699 
2700 /* ------------------------------------------------------------------------ */
2701 /* Function:    nat_icmperror                                               */
2702 /* Returns:     nat_t* - point to matching NAT structure                    */
2703 /* Parameters:  fin(I)    - pointer to packet information                   */
2704 /*              nflags(I) - NAT flags for this packet                       */
2705 /*              dir(I)    - direction of packet (in/out)                    */
2706 /*                                                                          */
2707 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2708 /* session.  This will correct both packet header data and checksums.       */
2709 /*                                                                          */
2710 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2711 /* a NAT'd ICMP packet gets correctly recognised.                           */
2712 /* ------------------------------------------------------------------------ */
2713 nat_t *nat_icmperror(fin, nflags, dir)
2714 fr_info_t *fin;
2715 u_int *nflags;
2716 int dir;
2717 {
2718 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2719 	struct in_addr in;
2720 	icmphdr_t *icmp, *orgicmp;
2721 	int dlen;
2722 	udphdr_t *udp;
2723 	tcphdr_t *tcp;
2724 	nat_t *nat;
2725 	ip_t *oip;
2726 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2727 		return NULL;
2728 
2729 	/*
2730 	 * nat_icmperrorlookup() looks up nat entry associated with the
2731 	 * offending IP packet and returns pointer to the entry, or NULL
2732 	 * if packet wasn't natted or for `defective' packets.
2733 	 */
2734 
2735 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2736 		return NULL;
2737 
2738 	sumd2 = 0;
2739 	*nflags = IPN_ICMPERR;
2740 	icmp = fin->fin_dp;
2741 	oip = (ip_t *)&icmp->icmp_ip;
2742 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2743 	tcp = (tcphdr_t *)udp;
2744 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2745 
2746 	/*
2747 	 * Need to adjust ICMP header to include the real IP#'s and
2748 	 * port #'s.  There are three steps required.
2749 	 *
2750 	 * Step 1
2751 	 * Fix the IP addresses in the offending IP packet and update
2752 	 * ip header checksum to compensate for the change.
2753 	 *
2754 	 * No update needed here for icmp_cksum because the ICMP checksum
2755 	 * is calculated over the complete ICMP packet, which includes the
2756 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2757 	 * cancel each other out (if the delta for the IP address is x,
2758 	 * then the delta for ip_sum is minus x).
2759 	 */
2760 
2761 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2762 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2763 		in = nat->nat_inip;
2764 		oip->ip_src = in;
2765 	} else {
2766 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2767 		in = nat->nat_outip;
2768 		oip->ip_dst = in;
2769 	}
2770 
2771 	sum2 = LONG_SUM(ntohl(in.s_addr));
2772 	CALC_SUMD(sum1, sum2, sumd);
2773 	fix_datacksum(&oip->ip_sum, sumd);
2774 
2775 	/*
2776 	 * Step 2
2777 	 * Perform other adjustments based on protocol of offending packet.
2778 	 */
2779 
2780 	switch (oip->ip_p) {
2781 		case IPPROTO_TCP :
2782 		case IPPROTO_UDP :
2783 
2784 			/*
2785 			* For offending TCP/UDP IP packets, translate the ports
2786 			* based on the NAT specification.
2787 			*
2788 			* Advance notice : Now it becomes complicated :-)
2789 			*
2790 			* Since the port and IP addresse fields are both part
2791 			* of the TCP/UDP checksum of the offending IP packet,
2792 			* we need to adjust that checksum as well.
2793 			*
2794 			* To further complicate things, the TCP/UDP checksum
2795 			* may not be present.  We must check to see if the
2796 			* length of the data portion is big enough to hold
2797 			* the checksum.  In the UDP case, a test to determine
2798 			* if the checksum is even set is also required.
2799 			*
2800 			* Any changes to an IP address, port or checksum within
2801 			* the ICMP packet requires a change to icmp_cksum.
2802 			*
2803 			* Be extremely careful here ... The change is dependent
2804 			* upon whether or not the TCP/UPD checksum is present.
2805 			*
2806 			* If TCP/UPD checksum is present, the icmp_cksum must
2807 			* compensate for checksum modification resulting from
2808 			* IP address change only.  Port change and resulting
2809 			* data checksum adjustments cancel each other out.
2810 			*
2811 			* If TCP/UDP checksum is not present, icmp_cksum must
2812 			* compensate for port change only.  The IP address
2813 			* change does not modify anything else in this case.
2814 			*/
2815 
2816 			psum1 = 0;
2817 			psum2 = 0;
2818 			psumd = 0;
2819 
2820 			if ((tcp->th_dport == nat->nat_oport) &&
2821 			    (tcp->th_sport != nat->nat_inport)) {
2822 
2823 				/*
2824 				 * Translate the source port.
2825 				 */
2826 
2827 				psum1 = ntohs(tcp->th_sport);
2828 				psum2 = ntohs(nat->nat_inport);
2829 				tcp->th_sport = nat->nat_inport;
2830 
2831 			} else if ((tcp->th_sport == nat->nat_oport) &&
2832 				    (tcp->th_dport != nat->nat_outport)) {
2833 
2834 				/*
2835 				 * Translate the destination port.
2836 				 */
2837 
2838 				psum1 = ntohs(tcp->th_dport);
2839 				psum2 = ntohs(nat->nat_outport);
2840 				tcp->th_dport = nat->nat_outport;
2841 			}
2842 
2843 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2844 
2845 				/*
2846 				 * TCP checksum present.
2847 				 *
2848 				 * Adjust data checksum and icmp checksum to
2849 				 * compensate for any IP address change.
2850 				 */
2851 
2852 				sum1 = ntohs(tcp->th_sum);
2853 				fix_datacksum(&tcp->th_sum, sumd);
2854 				sum2 = ntohs(tcp->th_sum);
2855 				sumd2 = sumd << 1;
2856 				CALC_SUMD(sum1, sum2, sumd);
2857 				sumd2 += sumd;
2858 
2859 				/*
2860 				 * Also make data checksum adjustment to
2861 				 * compensate for any port change.
2862 				 */
2863 
2864 				if (psum1 != psum2) {
2865 					CALC_SUMD(psum1, psum2, psumd);
2866 					fix_datacksum(&tcp->th_sum, psumd);
2867 				}
2868 
2869 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2870 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2871 
2872 				/*
2873 				 * The UDP checksum is present and set.
2874 				 *
2875 				 * Adjust data checksum and icmp checksum to
2876 				 * compensate for any IP address change.
2877 				 */
2878 
2879 				sum1 = ntohs(udp->uh_sum);
2880 				fix_datacksum(&udp->uh_sum, sumd);
2881 				sum2 = ntohs(udp->uh_sum);
2882 				sumd2 = sumd << 1;
2883 				CALC_SUMD(sum1, sum2, sumd);
2884 				sumd2 += sumd;
2885 
2886 				/*
2887 				 * Also make data checksum adjustment to
2888 				 * compensate for any port change.
2889 				 */
2890 
2891 				if (psum1 != psum2) {
2892 					CALC_SUMD(psum1, psum2, psumd);
2893 					fix_datacksum(&udp->uh_sum, psumd);
2894 				}
2895 
2896 			} else {
2897 
2898 				/*
2899 				 * Data checksum was not present.
2900 				 *
2901 				 * Compensate for any port change.
2902 				 */
2903 
2904 				CALC_SUMD(psum2, psum1, psumd);
2905 				sumd2 += psumd;
2906 			}
2907 			break;
2908 
2909 		case IPPROTO_ICMP :
2910 
2911 			orgicmp = (icmphdr_t *)udp;
2912 
2913 			if ((nat->nat_dir == NAT_OUTBOUND) &&
2914 			    (orgicmp->icmp_id != nat->nat_inport) &&
2915 			    (dlen >= 8)) {
2916 
2917 				/*
2918 				 * Fix ICMP checksum (of the offening ICMP
2919 				 * query packet) to compensate the change
2920 				 * in the ICMP id of the offending ICMP
2921 				 * packet.
2922 				 *
2923 				 * Since you modify orgicmp->icmp_id with
2924 				 * a delta (say x) and you compensate that
2925 				 * in origicmp->icmp_cksum with a delta
2926 				 * minus x, you don't have to adjust the
2927 				 * overall icmp->icmp_cksum
2928 				 */
2929 
2930 				sum1 = ntohs(orgicmp->icmp_id);
2931 				sum2 = ntohs(nat->nat_inport);
2932 				CALC_SUMD(sum1, sum2, sumd);
2933 				orgicmp->icmp_id = nat->nat_inport;
2934 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
2935 
2936 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
2937 
2938 			break;
2939 
2940 		default :
2941 
2942 			break;
2943 
2944 	} /* switch (oip->ip_p) */
2945 
2946 	/*
2947 	 * Step 3
2948 	 * Make the adjustments to icmp checksum.
2949 	 */
2950 
2951 	if (sumd2 != 0) {
2952 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2953 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2954 		fix_incksum(&icmp->icmp_cksum, sumd2);
2955 	}
2956 	return nat;
2957 }
2958 
2959 
2960 /*
2961  * NB: these lookups don't lock access to the list, it assumed that it has
2962  * already been done!
2963  */
2964 
2965 /* ------------------------------------------------------------------------ */
2966 /* Function:    nat_inlookup                                                */
2967 /* Returns:     nat_t* - NULL == no match,                                  */
2968 /*                       else pointer to matching NAT entry                 */
2969 /* Parameters:  fin(I)    - pointer to packet information                   */
2970 /*              flags(I)  - NAT flags for this packet                       */
2971 /*              p(I)      - protocol for this packet                        */
2972 /*              src(I)    - source IP address                               */
2973 /*              mapdst(I) - destination IP address                          */
2974 /*                                                                          */
2975 /* Lookup a nat entry based on the mapped destination ip address/port and   */
2976 /* real source address/port.  We use this lookup when receiving a packet,   */
2977 /* we're looking for a table entry, based on the destination address.       */
2978 /*                                                                          */
2979 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
2980 /*                                                                          */
2981 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
2982 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
2983 /*                                                                          */
2984 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
2985 /*            the packet is of said protocol                                */
2986 /* ------------------------------------------------------------------------ */
2987 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
2988 fr_info_t *fin;
2989 u_int flags, p;
2990 struct in_addr src , mapdst;
2991 {
2992 	u_short sport, dport;
2993 	ipnat_t *ipn;
2994 	u_int sflags;
2995 	nat_t *nat;
2996 	int nflags;
2997 	u_32_t dst;
2998 	void *ifp;
2999 	u_int hv;
3000 	ipf_stack_t *ifs = fin->fin_ifs;
3001 
3002 	if (fin != NULL)
3003 		ifp = fin->fin_ifp;
3004 	else
3005 		ifp = NULL;
3006 	sport = 0;
3007 	dport = 0;
3008 	dst = mapdst.s_addr;
3009 	sflags = flags & NAT_TCPUDPICMP;
3010 
3011 	switch (p)
3012 	{
3013 	case IPPROTO_TCP :
3014 	case IPPROTO_UDP :
3015 		sport = htons(fin->fin_data[0]);
3016 		dport = htons(fin->fin_data[1]);
3017 		break;
3018 	case IPPROTO_ICMP :
3019 		if (flags & IPN_ICMPERR)
3020 			sport = fin->fin_data[1];
3021 		else
3022 			dport = fin->fin_data[1];
3023 		break;
3024 	default :
3025 		break;
3026 	}
3027 
3028 
3029 	if ((flags & SI_WILDP) != 0)
3030 		goto find_in_wild_ports;
3031 
3032 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3033 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3034 	nat = ifs->ifs_nat_table[1][hv];
3035 	for (; nat; nat = nat->nat_hnext[1]) {
3036 		if (nat->nat_ifps[0] != NULL) {
3037 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3038 				continue;
3039 		} else if (ifp != NULL)
3040 			nat->nat_ifps[0] = ifp;
3041 
3042 		nflags = nat->nat_flags;
3043 
3044 		if (nat->nat_oip.s_addr == src.s_addr &&
3045 		    nat->nat_outip.s_addr == dst &&
3046 		    (((p == 0) &&
3047 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3048 		     || (p == nat->nat_p))) {
3049 			switch (p)
3050 			{
3051 #if 0
3052 			case IPPROTO_GRE :
3053 				if (nat->nat_call[1] != fin->fin_data[0])
3054 					continue;
3055 				break;
3056 #endif
3057 			case IPPROTO_ICMP :
3058 				if ((flags & IPN_ICMPERR) != 0) {
3059 					if (nat->nat_outport != sport)
3060 						continue;
3061 				} else {
3062 					if (nat->nat_outport != dport)
3063 						continue;
3064 				}
3065 				break;
3066 			case IPPROTO_TCP :
3067 			case IPPROTO_UDP :
3068 				if (nat->nat_oport != sport)
3069 					continue;
3070 				if (nat->nat_outport != dport)
3071 					continue;
3072 				break;
3073 			default :
3074 				break;
3075 			}
3076 
3077 			ipn = nat->nat_ptr;
3078 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3079 				if (appr_match(fin, nat) != 0)
3080 					continue;
3081 			return nat;
3082 		}
3083 	}
3084 
3085 	/*
3086 	 * So if we didn't find it but there are wildcard members in the hash
3087 	 * table, go back and look for them.  We do this search and update here
3088 	 * because it is modifying the NAT table and we want to do this only
3089 	 * for the first packet that matches.  The exception, of course, is
3090 	 * for "dummy" (FI_IGNORE) lookups.
3091 	 */
3092 find_in_wild_ports:
3093 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3094 		return NULL;
3095 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3096 		return NULL;
3097 
3098 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3099 
3100 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3101 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3102 
3103 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3104 
3105 	nat = ifs->ifs_nat_table[1][hv];
3106 	for (; nat; nat = nat->nat_hnext[1]) {
3107 		if (nat->nat_ifps[0] != NULL) {
3108 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3109 				continue;
3110 		} else if (ifp != NULL)
3111 			nat->nat_ifps[0] = ifp;
3112 
3113 		if (nat->nat_p != fin->fin_p)
3114 			continue;
3115 		if (nat->nat_oip.s_addr != src.s_addr ||
3116 		    nat->nat_outip.s_addr != dst)
3117 			continue;
3118 
3119 		nflags = nat->nat_flags;
3120 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3121 			continue;
3122 
3123 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3124 			       NAT_INBOUND) == 1) {
3125 			if ((fin->fin_flx & FI_IGNORE) != 0)
3126 				break;
3127 			if ((nflags & SI_CLONE) != 0) {
3128 				nat = fr_natclone(fin, nat);
3129 				if (nat == NULL)
3130 					break;
3131 			} else {
3132 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3133 				ifs->ifs_nat_stats.ns_wilds--;
3134 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3135 			}
3136 			nat->nat_oport = sport;
3137 			nat->nat_outport = dport;
3138 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3139 			nat_tabmove(nat, ifs);
3140 			break;
3141 		}
3142 	}
3143 
3144 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3145 
3146 	return nat;
3147 }
3148 
3149 
3150 /* ------------------------------------------------------------------------ */
3151 /* Function:    nat_tabmove                                                 */
3152 /* Returns:     Nil                                                         */
3153 /* Parameters:  nat(I) - pointer to NAT structure                           */
3154 /* Write Lock:  ipf_nat                                                     */
3155 /*                                                                          */
3156 /* This function is only called for TCP/UDP NAT table entries where the     */
3157 /* original was placed in the table without hashing on the ports and we now */
3158 /* want to include hashing on port numbers.                                 */
3159 /* ------------------------------------------------------------------------ */
3160 static void nat_tabmove(nat, ifs)
3161 nat_t *nat;
3162 ipf_stack_t *ifs;
3163 {
3164 	nat_t **natp;
3165 	u_int hv;
3166 
3167 	if (nat->nat_flags & SI_CLONE)
3168 		return;
3169 
3170 	/*
3171 	 * Remove the NAT entry from the old location
3172 	 */
3173 	if (nat->nat_hnext[0])
3174 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3175 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3176 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3177 
3178 	if (nat->nat_hnext[1])
3179 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3180 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3181 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3182 
3183 	/*
3184 	 * Add into the NAT table in the new position
3185 	 */
3186 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3187 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3188 			 ifs->ifs_ipf_nattable_sz);
3189 	nat->nat_hv[0] = hv;
3190 	natp = &ifs->ifs_nat_table[0][hv];
3191 	if (*natp)
3192 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3193 	nat->nat_phnext[0] = natp;
3194 	nat->nat_hnext[0] = *natp;
3195 	*natp = nat;
3196 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3197 
3198 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3199 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3200 			 ifs->ifs_ipf_nattable_sz);
3201 	nat->nat_hv[1] = hv;
3202 	natp = &ifs->ifs_nat_table[1][hv];
3203 	if (*natp)
3204 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3205 	nat->nat_phnext[1] = natp;
3206 	nat->nat_hnext[1] = *natp;
3207 	*natp = nat;
3208 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3209 }
3210 
3211 
3212 /* ------------------------------------------------------------------------ */
3213 /* Function:    nat_outlookup                                               */
3214 /* Returns:     nat_t* - NULL == no match,                                  */
3215 /*                       else pointer to matching NAT entry                 */
3216 /* Parameters:  fin(I)   - pointer to packet information                    */
3217 /*              flags(I) - NAT flags for this packet                        */
3218 /*              p(I)     - protocol for this packet                         */
3219 /*              src(I)   - source IP address                                */
3220 /*              dst(I)   - destination IP address                           */
3221 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3222 /*                                                                          */
3223 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3224 /* destination address/port.  We use this lookup when sending a packet out, */
3225 /* we're looking for a table entry, based on the source address.            */
3226 /*                                                                          */
3227 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3228 /*                                                                          */
3229 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3230 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3231 /*                                                                          */
3232 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3233 /*            the packet is of said protocol                                */
3234 /* ------------------------------------------------------------------------ */
3235 nat_t *nat_outlookup(fin, flags, p, src, dst)
3236 fr_info_t *fin;
3237 u_int flags, p;
3238 struct in_addr src , dst;
3239 {
3240 	u_short sport, dport;
3241 	u_int sflags;
3242 	ipnat_t *ipn;
3243 	u_32_t srcip;
3244 	nat_t *nat;
3245 	int nflags;
3246 	void *ifp;
3247 	u_int hv;
3248 	ipf_stack_t *ifs = fin->fin_ifs;
3249 
3250 	ifp = fin->fin_ifp;
3251 
3252 	srcip = src.s_addr;
3253 	sflags = flags & IPN_TCPUDPICMP;
3254 	sport = 0;
3255 	dport = 0;
3256 
3257 	switch (p)
3258 	{
3259 	case IPPROTO_TCP :
3260 	case IPPROTO_UDP :
3261 		sport = htons(fin->fin_data[0]);
3262 		dport = htons(fin->fin_data[1]);
3263 		break;
3264 	case IPPROTO_ICMP :
3265 		if (flags & IPN_ICMPERR)
3266 			sport = fin->fin_data[1];
3267 		else
3268 			dport = fin->fin_data[1];
3269 		break;
3270 	default :
3271 		break;
3272 	}
3273 
3274 	if ((flags & SI_WILDP) != 0)
3275 		goto find_out_wild_ports;
3276 
3277 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3278 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3279 	nat = ifs->ifs_nat_table[0][hv];
3280 	for (; nat; nat = nat->nat_hnext[0]) {
3281 		if (nat->nat_ifps[1] != NULL) {
3282 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3283 				continue;
3284 		} else if (ifp != NULL)
3285 			nat->nat_ifps[1] = ifp;
3286 
3287 		nflags = nat->nat_flags;
3288 
3289 		if (nat->nat_inip.s_addr == srcip &&
3290 		    nat->nat_oip.s_addr == dst.s_addr &&
3291 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3292 		     || (p == nat->nat_p))) {
3293 			switch (p)
3294 			{
3295 #if 0
3296 			case IPPROTO_GRE :
3297 				if (nat->nat_call[1] != fin->fin_data[0])
3298 					continue;
3299 				break;
3300 #endif
3301 			case IPPROTO_TCP :
3302 			case IPPROTO_UDP :
3303 				if (nat->nat_oport != dport)
3304 					continue;
3305 				if (nat->nat_inport != sport)
3306 					continue;
3307 				break;
3308 			default :
3309 				break;
3310 			}
3311 
3312 			ipn = nat->nat_ptr;
3313 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3314 				if (appr_match(fin, nat) != 0)
3315 					continue;
3316 			return nat;
3317 		}
3318 	}
3319 
3320 	/*
3321 	 * So if we didn't find it but there are wildcard members in the hash
3322 	 * table, go back and look for them.  We do this search and update here
3323 	 * because it is modifying the NAT table and we want to do this only
3324 	 * for the first packet that matches.  The exception, of course, is
3325 	 * for "dummy" (FI_IGNORE) lookups.
3326 	 */
3327 find_out_wild_ports:
3328 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3329 		return NULL;
3330 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3331 		return NULL;
3332 
3333 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3334 
3335 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3336 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3337 
3338 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3339 
3340 	nat = ifs->ifs_nat_table[0][hv];
3341 	for (; nat; nat = nat->nat_hnext[0]) {
3342 		if (nat->nat_ifps[1] != NULL) {
3343 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3344 				continue;
3345 		} else if (ifp != NULL)
3346 			nat->nat_ifps[1] = ifp;
3347 
3348 		if (nat->nat_p != fin->fin_p)
3349 			continue;
3350 		if ((nat->nat_inip.s_addr != srcip) ||
3351 		    (nat->nat_oip.s_addr != dst.s_addr))
3352 			continue;
3353 
3354 		nflags = nat->nat_flags;
3355 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3356 			continue;
3357 
3358 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3359 			       NAT_OUTBOUND) == 1) {
3360 			if ((fin->fin_flx & FI_IGNORE) != 0)
3361 				break;
3362 			if ((nflags & SI_CLONE) != 0) {
3363 				nat = fr_natclone(fin, nat);
3364 				if (nat == NULL)
3365 					break;
3366 			} else {
3367 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3368 				ifs->ifs_nat_stats.ns_wilds--;
3369 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3370 			}
3371 			nat->nat_inport = sport;
3372 			nat->nat_oport = dport;
3373 			if (nat->nat_outport == 0)
3374 				nat->nat_outport = sport;
3375 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3376 			nat_tabmove(nat, ifs);
3377 			break;
3378 		}
3379 	}
3380 
3381 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3382 
3383 	return nat;
3384 }
3385 
3386 
3387 /* ------------------------------------------------------------------------ */
3388 /* Function:    nat_lookupredir                                             */
3389 /* Returns:     nat_t* - NULL == no match,                                  */
3390 /*                       else pointer to matching NAT entry                 */
3391 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3392 /*                      entry for.                                          */
3393 /*                                                                          */
3394 /* Lookup the NAT tables to search for a matching redirect                  */
3395 /* ------------------------------------------------------------------------ */
3396 nat_t *nat_lookupredir(np, ifs)
3397 natlookup_t *np;
3398 ipf_stack_t *ifs;
3399 {
3400 	fr_info_t fi;
3401 	nat_t *nat;
3402 
3403 	bzero((char *)&fi, sizeof(fi));
3404 	if (np->nl_flags & IPN_IN) {
3405 		fi.fin_data[0] = ntohs(np->nl_realport);
3406 		fi.fin_data[1] = ntohs(np->nl_outport);
3407 	} else {
3408 		fi.fin_data[0] = ntohs(np->nl_inport);
3409 		fi.fin_data[1] = ntohs(np->nl_outport);
3410 	}
3411 	if (np->nl_flags & IPN_TCP)
3412 		fi.fin_p = IPPROTO_TCP;
3413 	else if (np->nl_flags & IPN_UDP)
3414 		fi.fin_p = IPPROTO_UDP;
3415 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3416 		fi.fin_p = IPPROTO_ICMP;
3417 
3418 	fi.fin_ifs = ifs;
3419 	/*
3420 	 * We can do two sorts of lookups:
3421 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3422 	 * - default: we have the `in' and `out' address, look for `real'.
3423 	 */
3424 	if (np->nl_flags & IPN_IN) {
3425 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3426 					np->nl_realip, np->nl_outip))) {
3427 			np->nl_inip = nat->nat_inip;
3428 			np->nl_inport = nat->nat_inport;
3429 		}
3430 	} else {
3431 		/*
3432 		 * If nl_inip is non null, this is a lookup based on the real
3433 		 * ip address. Else, we use the fake.
3434 		 */
3435 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3436 					 np->nl_inip, np->nl_outip))) {
3437 
3438 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3439 				fr_info_t fin;
3440 				bzero((char *)&fin, sizeof(fin));
3441 				fin.fin_p = nat->nat_p;
3442 				fin.fin_data[0] = ntohs(nat->nat_outport);
3443 				fin.fin_data[1] = ntohs(nat->nat_oport);
3444 				fin.fin_ifs = ifs;
3445 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3446 						 nat->nat_outip,
3447 						 nat->nat_oip) != NULL) {
3448 					np->nl_flags &= ~IPN_FINDFORWARD;
3449 				}
3450 			}
3451 
3452 			np->nl_realip = nat->nat_outip;
3453 			np->nl_realport = nat->nat_outport;
3454 		}
3455  	}
3456 
3457 	return nat;
3458 }
3459 
3460 
3461 /* ------------------------------------------------------------------------ */
3462 /* Function:    nat_match                                                   */
3463 /* Returns:     int - 0 == no match, 1 == match                             */
3464 /* Parameters:  fin(I)   - pointer to packet information                    */
3465 /*              np(I)    - pointer to NAT rule                              */
3466 /*                                                                          */
3467 /* Pull the matching of a packet against a NAT rule out of that complex     */
3468 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3469 /* ------------------------------------------------------------------------ */
3470 static int nat_match(fin, np)
3471 fr_info_t *fin;
3472 ipnat_t *np;
3473 {
3474 	frtuc_t *ft;
3475 
3476 	if (fin->fin_v != 4)
3477 		return 0;
3478 
3479 	if (np->in_p && fin->fin_p != np->in_p)
3480 		return 0;
3481 
3482 	if (fin->fin_out) {
3483 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3484 			return 0;
3485 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3486 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3487 			return 0;
3488 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3489 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3490 			return 0;
3491 	} else {
3492 		if (!(np->in_redir & NAT_REDIRECT))
3493 			return 0;
3494 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3495 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3496 			return 0;
3497 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3498 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3499 			return 0;
3500 	}
3501 
3502 	ft = &np->in_tuc;
3503 	if (!(fin->fin_flx & FI_TCPUDP) ||
3504 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3505 		if (ft->ftu_scmp || ft->ftu_dcmp)
3506 			return 0;
3507 		return 1;
3508 	}
3509 
3510 	return fr_tcpudpchk(fin, ft);
3511 }
3512 
3513 
3514 /* ------------------------------------------------------------------------ */
3515 /* Function:    nat_update                                                  */
3516 /* Returns:     Nil                                                         */
3517 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3518 /*              np(I)     - pointer to NAT rule                             */
3519 /*                                                                          */
3520 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3521 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3522 /* ------------------------------------------------------------------------ */
3523 void nat_update(fin, nat, np)
3524 fr_info_t *fin;
3525 nat_t *nat;
3526 ipnat_t *np;
3527 {
3528 	ipftq_t *ifq, *ifq2;
3529 	ipftqent_t *tqe;
3530 	ipf_stack_t *ifs = fin->fin_ifs;
3531 
3532 	MUTEX_ENTER(&nat->nat_lock);
3533 	tqe = &nat->nat_tqe;
3534 	ifq = tqe->tqe_ifq;
3535 
3536 	/*
3537 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3538 	 * TCP, however, if it is TCP and there is no rule timeout set,
3539 	 * then do not update the timeout here.
3540 	 */
3541 	if (np != NULL)
3542 		ifq2 = np->in_tqehead[fin->fin_rev];
3543 	else
3544 		ifq2 = NULL;
3545 
3546 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3547 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3548 	} else {
3549 		if (ifq2 == NULL) {
3550 			if (nat->nat_p == IPPROTO_UDP)
3551 				ifq2 = &ifs->ifs_nat_udptq;
3552 			else if (nat->nat_p == IPPROTO_ICMP)
3553 				ifq2 = &ifs->ifs_nat_icmptq;
3554 			else
3555 				ifq2 = &ifs->ifs_nat_iptq;
3556 		}
3557 
3558 		fr_movequeue(tqe, ifq, ifq2, ifs);
3559 	}
3560 	MUTEX_EXIT(&nat->nat_lock);
3561 }
3562 
3563 
3564 /* ------------------------------------------------------------------------ */
3565 /* Function:    fr_checknatout                                              */
3566 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3567 /*                     0 == no packet translation occurred,                 */
3568 /*                     1 == packet was successfully translated.             */
3569 /* Parameters:  fin(I)   - pointer to packet information                    */
3570 /*              passp(I) - pointer to filtering result flags                */
3571 /*                                                                          */
3572 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3573 /* first checked to see if they match an existing entry (if an error),      */
3574 /* otherwise a search of the current NAT table is made.  If neither results */
3575 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3576 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3577 /* packet header(s) as required.                                            */
3578 /* ------------------------------------------------------------------------ */
3579 int fr_checknatout(fin, passp)
3580 fr_info_t *fin;
3581 u_32_t *passp;
3582 {
3583 	struct ifnet *ifp, *sifp;
3584 	icmphdr_t *icmp = NULL;
3585 	tcphdr_t *tcp = NULL;
3586 	int rval, natfailed;
3587 	ipnat_t *np = NULL;
3588 	u_int nflags = 0;
3589 	u_32_t ipa, iph;
3590 	int natadd = 1;
3591 	frentry_t *fr;
3592 	nat_t *nat;
3593 	ipf_stack_t *ifs = fin->fin_ifs;
3594 
3595 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3596 		return 0;
3597 
3598 	natfailed = 0;
3599 	fr = fin->fin_fr;
3600 	sifp = fin->fin_ifp;
3601 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3602 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3603 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3604 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3605 	ifp = fin->fin_ifp;
3606 
3607 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3608 		switch (fin->fin_p)
3609 		{
3610 		case IPPROTO_TCP :
3611 			nflags = IPN_TCP;
3612 			break;
3613 		case IPPROTO_UDP :
3614 			nflags = IPN_UDP;
3615 			break;
3616 		case IPPROTO_ICMP :
3617 			icmp = fin->fin_dp;
3618 
3619 			/*
3620 			 * This is an incoming packet, so the destination is
3621 			 * the icmp_id and the source port equals 0
3622 			 */
3623 			if (nat_icmpquerytype4(icmp->icmp_type))
3624 				nflags = IPN_ICMPQUERY;
3625 			break;
3626 		default :
3627 			break;
3628 		}
3629 
3630 		if ((nflags & IPN_TCPUDP))
3631 			tcp = fin->fin_dp;
3632 	}
3633 
3634 	ipa = fin->fin_saddr;
3635 
3636 	READ_ENTER(&ifs->ifs_ipf_nat);
3637 
3638 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3639 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3640 		/*EMPTY*/;
3641 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3642 		natadd = 0;
3643 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3644 				      fin->fin_src, fin->fin_dst))) {
3645 		nflags = nat->nat_flags;
3646 	} else {
3647 		u_32_t hv, msk, nmsk;
3648 
3649 		/*
3650 		 * If there is no current entry in the nat table for this IP#,
3651 		 * create one for it (if there is a matching rule).
3652 		 */
3653 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3654 		msk = 0xffffffff;
3655 		nmsk = ifs->ifs_nat_masks;
3656 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3657 maskloop:
3658 		iph = ipa & htonl(msk);
3659 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3660 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3661 		{
3662 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3663 				continue;
3664 			if (np->in_v != fin->fin_v)
3665 				continue;
3666 			if (np->in_p && (np->in_p != fin->fin_p))
3667 				continue;
3668 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3669 				continue;
3670 			if (np->in_flags & IPN_FILTER) {
3671 				if (!nat_match(fin, np))
3672 					continue;
3673 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3674 				continue;
3675 
3676 			if ((fr != NULL) &&
3677 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3678 				continue;
3679 
3680 			if (*np->in_plabel != '\0') {
3681 				if (((np->in_flags & IPN_FILTER) == 0) &&
3682 				    (np->in_dport != tcp->th_dport))
3683 					continue;
3684 				if (appr_ok(fin, tcp, np) == 0)
3685 					continue;
3686 			}
3687 
3688 			if ((nat = nat_new(fin, np, NULL, nflags,
3689 					   NAT_OUTBOUND))) {
3690 				np->in_hits++;
3691 				break;
3692 			} else
3693 				natfailed = -1;
3694 		}
3695 		if ((np == NULL) && (nmsk != 0)) {
3696 			while (nmsk) {
3697 				msk <<= 1;
3698 				if (nmsk & 0x80000000)
3699 					break;
3700 				nmsk <<= 1;
3701 			}
3702 			if (nmsk != 0) {
3703 				nmsk <<= 1;
3704 				goto maskloop;
3705 			}
3706 		}
3707 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3708 	}
3709 
3710 	if (nat != NULL) {
3711 		rval = fr_natout(fin, nat, natadd, nflags);
3712 		if (rval == 1) {
3713 			MUTEX_ENTER(&nat->nat_lock);
3714 			nat->nat_ref++;
3715 			MUTEX_EXIT(&nat->nat_lock);
3716 			nat->nat_touched = ifs->ifs_fr_ticks;
3717 			fin->fin_nat = nat;
3718 		}
3719 	} else
3720 		rval = natfailed;
3721 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3722 
3723 	if (rval == -1) {
3724 		if (passp != NULL)
3725 			*passp = FR_BLOCK;
3726 		fin->fin_flx |= FI_BADNAT;
3727 	}
3728 	fin->fin_ifp = sifp;
3729 	return rval;
3730 }
3731 
3732 /* ------------------------------------------------------------------------ */
3733 /* Function:    fr_natout                                                   */
3734 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3735 /*                     1 == packet was successfully translated.             */
3736 /* Parameters:  fin(I)    - pointer to packet information                   */
3737 /*              nat(I)    - pointer to NAT structure                        */
3738 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3739 /*              nflags(I) - NAT flags set for this packet                   */
3740 /*                                                                          */
3741 /* Translate a packet coming "out" on an interface.                         */
3742 /* ------------------------------------------------------------------------ */
3743 int fr_natout(fin, nat, natadd, nflags)
3744 fr_info_t *fin;
3745 nat_t *nat;
3746 int natadd;
3747 u_32_t nflags;
3748 {
3749 	icmphdr_t *icmp;
3750 	u_short *csump;
3751 	u_32_t sumd;
3752 	tcphdr_t *tcp;
3753 	ipnat_t *np;
3754 	int i;
3755 	ipf_stack_t *ifs = fin->fin_ifs;
3756 
3757 #if SOLARIS && defined(_KERNEL)
3758 	net_data_t net_data_p;
3759 	if (fin->fin_v == 4)
3760 		net_data_p = ifs->ifs_ipf_ipv4;
3761 	else
3762 		net_data_p = ifs->ifs_ipf_ipv6;
3763 #endif
3764 
3765 	tcp = NULL;
3766 	icmp = NULL;
3767 	csump = NULL;
3768 	np = nat->nat_ptr;
3769 
3770 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
3771 		(void) fr_nat_newfrag(fin, 0, nat);
3772 
3773 	MUTEX_ENTER(&nat->nat_lock);
3774 	nat->nat_bytes[1] += fin->fin_plen;
3775 	nat->nat_pkts[1]++;
3776 	MUTEX_EXIT(&nat->nat_lock);
3777 
3778 	/*
3779 	 * Fix up checksums, not by recalculating them, but
3780 	 * simply computing adjustments.
3781 	 * This is only done for STREAMS based IP implementations where the
3782 	 * checksum has already been calculated by IP.  In all other cases,
3783 	 * IPFilter is called before the checksum needs calculating so there
3784 	 * is no call to modify whatever is in the header now.
3785 	 */
3786 	ASSERT(fin->fin_m != NULL);
3787 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3788 		if (nflags == IPN_ICMPERR) {
3789 			u_32_t s1, s2;
3790 
3791 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3792 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3793 			CALC_SUMD(s1, s2, sumd);
3794 
3795 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3796 		}
3797 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3798     defined(linux) || defined(BRIDGE_IPF)
3799 		else {
3800 			/*
3801 			 * Strictly speaking, this isn't necessary on BSD
3802 			 * kernels because they do checksum calculation after
3803 			 * this code has run BUT if ipfilter is being used
3804 			 * to do NAT as a bridge, that code doesn't exist.
3805 			 */
3806 			if (nat->nat_dir == NAT_OUTBOUND)
3807 				fix_outcksum(&fin->fin_ip->ip_sum,
3808 					    nat->nat_ipsumd);
3809 			else
3810 				fix_incksum(&fin->fin_ip->ip_sum,
3811 				 	   nat->nat_ipsumd);
3812 		}
3813 #endif
3814 	}
3815 
3816 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3817 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3818 			tcp = fin->fin_dp;
3819 
3820 			tcp->th_sport = nat->nat_outport;
3821 			fin->fin_data[0] = ntohs(nat->nat_outport);
3822 		}
3823 
3824 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3825 			icmp = fin->fin_dp;
3826 			icmp->icmp_id = nat->nat_outport;
3827 		}
3828 
3829 		csump = nat_proto(fin, nat, nflags);
3830 	}
3831 
3832 	fin->fin_ip->ip_src = nat->nat_outip;
3833 
3834 	nat_update(fin, nat, np);
3835 
3836 	/*
3837 	 * The above comments do not hold for layer 4 (or higher) checksums...
3838 	 */
3839 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3840 		if (nflags & IPN_TCPUDP &&
3841 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3842 			sumd = nat->nat_sumd[1];
3843 		else
3844 			sumd = nat->nat_sumd[0];
3845 
3846 		if (nat->nat_dir == NAT_OUTBOUND)
3847 			fix_outcksum(csump, sumd);
3848 		else
3849 			fix_incksum(csump, sumd);
3850 	}
3851 #ifdef	IPFILTER_SYNC
3852 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3853 #endif
3854 	/* ------------------------------------------------------------- */
3855 	/* A few quick notes:						 */
3856 	/*	Following are test conditions prior to calling the 	 */
3857 	/*	appr_check routine.					 */
3858 	/*								 */
3859 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3860 	/*	with a redirect rule, we attempt to match the packet's	 */
3861 	/*	source port against in_dport, otherwise	we'd compare the */
3862 	/*	packet's destination.			 		 */
3863 	/* ------------------------------------------------------------- */
3864 	if ((np != NULL) && (np->in_apr != NULL)) {
3865 		i = appr_check(fin, nat);
3866 		if (i == 0)
3867 			i = 1;
3868 	} else
3869 		i = 1;
3870 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3871 	fin->fin_flx |= FI_NATED;
3872 	return i;
3873 }
3874 
3875 
3876 /* ------------------------------------------------------------------------ */
3877 /* Function:    fr_checknatin                                               */
3878 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3879 /*                     0 == no packet translation occurred,                 */
3880 /*                     1 == packet was successfully translated.             */
3881 /* Parameters:  fin(I)   - pointer to packet information                    */
3882 /*              passp(I) - pointer to filtering result flags                */
3883 /*                                                                          */
3884 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
3885 /* first checked to see if they match an existing entry (if an error),      */
3886 /* otherwise a search of the current NAT table is made.  If neither results */
3887 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3888 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3889 /* packet header(s) as required.                                            */
3890 /* ------------------------------------------------------------------------ */
3891 int fr_checknatin(fin, passp)
3892 fr_info_t *fin;
3893 u_32_t *passp;
3894 {
3895 	u_int nflags, natadd;
3896 	int rval, natfailed;
3897 	struct ifnet *ifp;
3898 	struct in_addr in;
3899 	icmphdr_t *icmp;
3900 	tcphdr_t *tcp;
3901 	u_short dport;
3902 	ipnat_t *np;
3903 	nat_t *nat;
3904 	u_32_t iph;
3905 	ipf_stack_t *ifs = fin->fin_ifs;
3906 
3907 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3908 		return 0;
3909 
3910 	tcp = NULL;
3911 	icmp = NULL;
3912 	dport = 0;
3913 	natadd = 1;
3914 	nflags = 0;
3915 	natfailed = 0;
3916 	ifp = fin->fin_ifp;
3917 
3918 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3919 		switch (fin->fin_p)
3920 		{
3921 		case IPPROTO_TCP :
3922 			nflags = IPN_TCP;
3923 			break;
3924 		case IPPROTO_UDP :
3925 			nflags = IPN_UDP;
3926 			break;
3927 		case IPPROTO_ICMP :
3928 			icmp = fin->fin_dp;
3929 
3930 			/*
3931 			 * This is an incoming packet, so the destination is
3932 			 * the icmp_id and the source port equals 0
3933 			 */
3934 			if (nat_icmpquerytype4(icmp->icmp_type)) {
3935 				nflags = IPN_ICMPQUERY;
3936 				dport = icmp->icmp_id;
3937 			} break;
3938 		default :
3939 			break;
3940 		}
3941 
3942 		if ((nflags & IPN_TCPUDP)) {
3943 			tcp = fin->fin_dp;
3944 			dport = tcp->th_dport;
3945 		}
3946 	}
3947 
3948 	in = fin->fin_dst;
3949 
3950 	READ_ENTER(&ifs->ifs_ipf_nat);
3951 
3952 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3953 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
3954 		/*EMPTY*/;
3955 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3956 		natadd = 0;
3957 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3958 				     fin->fin_src, in))) {
3959 		nflags = nat->nat_flags;
3960 	} else {
3961 		u_32_t hv, msk, rmsk;
3962 
3963 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3964 		rmsk = ifs->ifs_rdr_masks;
3965 		msk = 0xffffffff;
3966 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3967 		/*
3968 		 * If there is no current entry in the nat table for this IP#,
3969 		 * create one for it (if there is a matching rule).
3970 		 */
3971 maskloop:
3972 		iph = in.s_addr & htonl(msk);
3973 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
3974 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
3975 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
3976 				continue;
3977 			if (np->in_v != fin->fin_v)
3978 				continue;
3979 			if (np->in_p && (np->in_p != fin->fin_p))
3980 				continue;
3981 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3982 				continue;
3983 			if (np->in_flags & IPN_FILTER) {
3984 				if (!nat_match(fin, np))
3985 					continue;
3986 			} else {
3987 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
3988 					continue;
3989 				if (np->in_pmin &&
3990 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
3991 				     (ntohs(dport) < ntohs(np->in_pmin))))
3992 					continue;
3993 			}
3994 
3995 			if (*np->in_plabel != '\0') {
3996 				if (!appr_ok(fin, tcp, np)) {
3997 					continue;
3998 				}
3999 			}
4000 
4001 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4002 			if (nat != NULL) {
4003 				np->in_hits++;
4004 				break;
4005 			} else
4006 				natfailed = -1;
4007 		}
4008 
4009 		if ((np == NULL) && (rmsk != 0)) {
4010 			while (rmsk) {
4011 				msk <<= 1;
4012 				if (rmsk & 0x80000000)
4013 					break;
4014 				rmsk <<= 1;
4015 			}
4016 			if (rmsk != 0) {
4017 				rmsk <<= 1;
4018 				goto maskloop;
4019 			}
4020 		}
4021 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4022 	}
4023 	if (nat != NULL) {
4024 		rval = fr_natin(fin, nat, natadd, nflags);
4025 		if (rval == 1) {
4026 			MUTEX_ENTER(&nat->nat_lock);
4027 			nat->nat_ref++;
4028 			MUTEX_EXIT(&nat->nat_lock);
4029 			nat->nat_touched = ifs->ifs_fr_ticks;
4030 			fin->fin_nat = nat;
4031 			fin->fin_state = nat->nat_state;
4032 		}
4033 	} else
4034 		rval = natfailed;
4035 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4036 
4037 	if (rval == -1) {
4038 		if (passp != NULL)
4039 			*passp = FR_BLOCK;
4040 		fin->fin_flx |= FI_BADNAT;
4041 	}
4042 	return rval;
4043 }
4044 
4045 
4046 /* ------------------------------------------------------------------------ */
4047 /* Function:    fr_natin                                                    */
4048 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4049 /*                     1 == packet was successfully translated.             */
4050 /* Parameters:  fin(I)    - pointer to packet information                   */
4051 /*              nat(I)    - pointer to NAT structure                        */
4052 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4053 /*              nflags(I) - NAT flags set for this packet                   */
4054 /* Locks Held:  ipf_nat (READ)                                              */
4055 /*                                                                          */
4056 /* Translate a packet coming "in" on an interface.                          */
4057 /* ------------------------------------------------------------------------ */
4058 int fr_natin(fin, nat, natadd, nflags)
4059 fr_info_t *fin;
4060 nat_t *nat;
4061 int natadd;
4062 u_32_t nflags;
4063 {
4064 	icmphdr_t *icmp;
4065 	u_short *csump, *csump1;
4066 	u_32_t sumd;
4067 	tcphdr_t *tcp;
4068 	ipnat_t *np;
4069 	int i;
4070 	ipf_stack_t *ifs = fin->fin_ifs;
4071 
4072 #if SOLARIS && defined(_KERNEL)
4073 	net_data_t net_data_p;
4074 	if (fin->fin_v == 4)
4075 		net_data_p = ifs->ifs_ipf_ipv4;
4076 	else
4077 		net_data_p = ifs->ifs_ipf_ipv6;
4078 #endif
4079 
4080 	tcp = NULL;
4081 	csump = NULL;
4082 	np = nat->nat_ptr;
4083 	fin->fin_fr = nat->nat_fr;
4084 
4085 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4086 		(void) fr_nat_newfrag(fin, 0, nat);
4087 
4088 	if (np != NULL) {
4089 
4090 	/* ------------------------------------------------------------- */
4091 	/* A few quick notes:						 */
4092 	/*	Following are test conditions prior to calling the 	 */
4093 	/*	appr_check routine.					 */
4094 	/*								 */
4095 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4096 	/*	with a map rule, we attempt to match the packet's	 */
4097 	/*	source port against in_dport, otherwise	we'd compare the */
4098 	/*	packet's destination.			 		 */
4099 	/* ------------------------------------------------------------- */
4100 		if (np->in_apr != NULL) {
4101 			i = appr_check(fin, nat);
4102 			if (i == -1) {
4103 				return -1;
4104 			}
4105 		}
4106 	}
4107 
4108 #ifdef	IPFILTER_SYNC
4109 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4110 #endif
4111 
4112 	MUTEX_ENTER(&nat->nat_lock);
4113 	nat->nat_bytes[0] += fin->fin_plen;
4114 	nat->nat_pkts[0]++;
4115 	MUTEX_EXIT(&nat->nat_lock);
4116 
4117 	fin->fin_ip->ip_dst = nat->nat_inip;
4118 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4119 	if (nflags & IPN_TCPUDP)
4120 		tcp = fin->fin_dp;
4121 
4122 	/*
4123 	 * Fix up checksums, not by recalculating them, but
4124 	 * simply computing adjustments.
4125 	 * Why only do this for some platforms on inbound packets ?
4126 	 * Because for those that it is done, IP processing is yet to happen
4127 	 * and so the IPv4 header checksum has not yet been evaluated.
4128 	 * Perhaps it should always be done for the benefit of things like
4129 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4130 	 * header checksum offloading, perhaps it is a moot point.
4131 	 */
4132 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4133      defined(__osf__) || defined(linux)
4134 	if (nat->nat_dir == NAT_OUTBOUND)
4135 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4136 	else
4137 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4138 #endif
4139 
4140 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4141 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4142 			tcp->th_dport = nat->nat_inport;
4143 			fin->fin_data[1] = ntohs(nat->nat_inport);
4144 		}
4145 
4146 
4147 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4148 			icmp = fin->fin_dp;
4149 
4150 			icmp->icmp_id = nat->nat_inport;
4151 		}
4152 
4153 		csump = nat_proto(fin, nat, nflags);
4154 	}
4155 
4156 	nat_update(fin, nat, np);
4157 
4158 #if SOLARIS && defined(_KERNEL)
4159 	if (nflags & IPN_TCPUDP &&
4160 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4161 		sumd = nat->nat_sumd[1];
4162 		csump1 = &(fin->fin_m->b_datap->db_struioun.cksum.cksum_val.u16);
4163 		if (csump1 != NULL) {
4164 			if (nat->nat_dir == NAT_OUTBOUND)
4165 				fix_incksum(csump1, sumd);
4166 			else
4167 				fix_outcksum(csump1, sumd);
4168 		}
4169 	} else
4170 #endif
4171 		sumd = nat->nat_sumd[0];
4172 
4173 	/*
4174 	 * Inbound packets always need to have their address adjusted in case
4175 	 * code following this validates it.
4176 	 */
4177 	if (csump != NULL) {
4178 		if (nat->nat_dir == NAT_OUTBOUND)
4179 			fix_incksum(csump, sumd);
4180 		else
4181 			fix_outcksum(csump, sumd);
4182 	}
4183 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4184 	fin->fin_flx |= FI_NATED;
4185 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4186 		fin->fin_nattag = &np->in_tag;
4187 	return 1;
4188 }
4189 
4190 
4191 /* ------------------------------------------------------------------------ */
4192 /* Function:    nat_proto                                                   */
4193 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4194 /*                         NULL if the transport protocol is not recognised */
4195 /*                         as needing a checksum update.                    */
4196 /* Parameters:  fin(I)    - pointer to packet information                   */
4197 /*              nat(I)    - pointer to NAT structure                        */
4198 /*              nflags(I) - NAT flags set for this packet                   */
4199 /*                                                                          */
4200 /* Return the pointer to the checksum field for each protocol so understood.*/
4201 /* If support for making other changes to a protocol header is required,    */
4202 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4203 /* TCP down to a specific value, then do it from here.                      */
4204 /* ------------------------------------------------------------------------ */
4205 u_short *nat_proto(fin, nat, nflags)
4206 fr_info_t *fin;
4207 nat_t *nat;
4208 u_int nflags;
4209 {
4210 	icmphdr_t *icmp;
4211 	u_short *csump;
4212 	tcphdr_t *tcp;
4213 	udphdr_t *udp;
4214 
4215 	csump = NULL;
4216 	if (fin->fin_out == 0) {
4217 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4218 	} else {
4219 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4220 	}
4221 
4222 	switch (fin->fin_p)
4223 	{
4224 	case IPPROTO_TCP :
4225 		tcp = fin->fin_dp;
4226 
4227 		csump = &tcp->th_sum;
4228 
4229 		/*
4230 		 * Do a MSS CLAMPING on a SYN packet,
4231 		 * only deal IPv4 for now.
4232 		 */
4233 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4234 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4235 
4236 		break;
4237 
4238 	case IPPROTO_UDP :
4239 		udp = fin->fin_dp;
4240 
4241 		if (udp->uh_sum)
4242 			csump = &udp->uh_sum;
4243 		break;
4244 
4245 	case IPPROTO_ICMP :
4246 		icmp = fin->fin_dp;
4247 
4248 		if ((nflags & IPN_ICMPQUERY) != 0) {
4249 			if (icmp->icmp_cksum != 0)
4250 				csump = &icmp->icmp_cksum;
4251 		}
4252 		break;
4253 	}
4254 	return csump;
4255 }
4256 
4257 
4258 /* ------------------------------------------------------------------------ */
4259 /* Function:    fr_natunload                                                */
4260 /* Returns:     Nil                                                         */
4261 /* Parameters:  Nil                                                         */
4262 /*                                                                          */
4263 /* Free all memory used by NAT structures allocated at runtime.             */
4264 /* ------------------------------------------------------------------------ */
4265 void fr_natunload(ifs)
4266 ipf_stack_t *ifs;
4267 {
4268 	ipftq_t *ifq, *ifqnext;
4269 
4270 	(void) nat_clearlist(ifs);
4271 	(void) nat_flushtable(ifs);
4272 
4273 	/*
4274 	 * Proxy timeout queues are not cleaned here because although they
4275 	 * exist on the NAT list, appr_unload is called after fr_natunload
4276 	 * and the proxies actually are responsible for them being created.
4277 	 * Should the proxy timeouts have their own list?  There's no real
4278 	 * justification as this is the only complication.
4279 	 */
4280 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4281 		ifqnext = ifq->ifq_next;
4282 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4283 		    (fr_deletetimeoutqueue(ifq) == 0))
4284 			fr_freetimeoutqueue(ifq, ifs);
4285 	}
4286 
4287 	if (ifs->ifs_nat_table[0] != NULL) {
4288 		KFREES(ifs->ifs_nat_table[0],
4289 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4290 		ifs->ifs_nat_table[0] = NULL;
4291 	}
4292 	if (ifs->ifs_nat_table[1] != NULL) {
4293 		KFREES(ifs->ifs_nat_table[1],
4294 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4295 		ifs->ifs_nat_table[1] = NULL;
4296 	}
4297 	if (ifs->ifs_nat_rules != NULL) {
4298 		KFREES(ifs->ifs_nat_rules,
4299 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4300 		ifs->ifs_nat_rules = NULL;
4301 	}
4302 	if (ifs->ifs_rdr_rules != NULL) {
4303 		KFREES(ifs->ifs_rdr_rules,
4304 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4305 		ifs->ifs_rdr_rules = NULL;
4306 	}
4307 	if (ifs->ifs_maptable != NULL) {
4308 		KFREES(ifs->ifs_maptable,
4309 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4310 		ifs->ifs_maptable = NULL;
4311 	}
4312 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4313 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4314 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4315 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4316 	}
4317 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4318 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4319 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4320 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4321 	}
4322 
4323 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4324 		ifs->ifs_fr_nat_maxbucket = 0;
4325 
4326 	if (ifs->ifs_fr_nat_init == 1) {
4327 		ifs->ifs_fr_nat_init = 0;
4328 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4329 
4330 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4331 		RW_DESTROY(&ifs->ifs_ipf_nat);
4332 
4333 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4334 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4335 
4336 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4337 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4338 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4339 	}
4340 }
4341 
4342 
4343 /* ------------------------------------------------------------------------ */
4344 /* Function:    fr_natexpire                                                */
4345 /* Returns:     Nil                                                         */
4346 /* Parameters:  Nil                                                         */
4347 /*                                                                          */
4348 /* Check all of the timeout queues for entries at the top which need to be  */
4349 /* expired.                                                                 */
4350 /* ------------------------------------------------------------------------ */
4351 void fr_natexpire(ifs)
4352 ipf_stack_t *ifs;
4353 {
4354 	ipftq_t *ifq, *ifqnext;
4355 	ipftqent_t *tqe, *tqn;
4356 	int i;
4357 	SPL_INT(s);
4358 
4359 	SPL_NET(s);
4360 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4361 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4362 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4363 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4364 				break;
4365 			tqn = tqe->tqe_next;
4366 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4367 		}
4368 	}
4369 
4370 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4371 		ifqnext = ifq->ifq_next;
4372 
4373 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4374 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4375 				break;
4376 			tqn = tqe->tqe_next;
4377 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4378 		}
4379 	}
4380 
4381 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4382 		ifqnext = ifq->ifq_next;
4383 
4384 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4385 		    (ifq->ifq_ref == 0)) {
4386 			fr_freetimeoutqueue(ifq, ifs);
4387 		}
4388 	}
4389 
4390 	if (ifs->ifs_nat_doflush != 0) {
4391 		(void) nat_extraflush(2, ifs);
4392 		ifs->ifs_nat_doflush = 0;
4393 	}
4394 
4395 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4396 	SPL_X(s);
4397 }
4398 
4399 
4400 /* ------------------------------------------------------------------------ */
4401 /* Function:    fr_nataddrsync                                              */
4402 /* Returns:     Nil                                                         */
4403 /* Parameters:  ifp(I) -  pointer to network interface                      */
4404 /*              addr(I) - pointer to new network address                    */
4405 /*                                                                          */
4406 /* Walk through all of the currently active NAT sessions, looking for those */
4407 /* which need to have their translated address updated (where the interface */
4408 /* matches the one passed in) and change it, recalculating the checksum sum */
4409 /* difference too.                                                          */
4410 /* ------------------------------------------------------------------------ */
4411 void fr_nataddrsync(ifp, addr, ifs)
4412 void *ifp;
4413 struct in_addr *addr;
4414 ipf_stack_t *ifs;
4415 {
4416 	u_32_t sum1, sum2, sumd;
4417 	nat_t *nat;
4418 	ipnat_t *np;
4419 	SPL_INT(s);
4420 
4421 	if (ifs->ifs_fr_running <= 0)
4422 		return;
4423 
4424 	SPL_NET(s);
4425 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4426 
4427 	if (ifs->ifs_fr_running <= 0) {
4428 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4429 		return;
4430 	}
4431 
4432 	/*
4433 	 * Change IP addresses for NAT sessions for any protocol except TCP
4434 	 * since it will break the TCP connection anyway.  The only rules
4435 	 * which will get changed are those which are "map ... -> 0/32",
4436 	 * where the rule specifies the address is taken from the interface.
4437 	 */
4438 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4439 		if (addr != NULL) {
4440 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4441 			    ((nat->nat_flags & IPN_TCP) != 0))
4442 				continue;
4443 			if (((np = nat->nat_ptr) == NULL) ||
4444 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4445 				continue;
4446 
4447 			/*
4448 			 * Change the map-to address to be the same as the
4449 			 * new one.
4450 			 */
4451 			sum1 = nat->nat_outip.s_addr;
4452 			nat->nat_outip = *addr;
4453 			sum2 = nat->nat_outip.s_addr;
4454 
4455 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4456 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4457 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4458 			struct in_addr in;
4459 
4460 			/*
4461 			 * Change the map-to address to be the same as the
4462 			 * new one.
4463 			 */
4464 			sum1 = nat->nat_outip.s_addr;
4465 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4466 				       &in, NULL, ifs) != -1)
4467 				nat->nat_outip = in;
4468 			sum2 = nat->nat_outip.s_addr;
4469 		} else {
4470 			continue;
4471 		}
4472 
4473 		if (sum1 == sum2)
4474 			continue;
4475 		/*
4476 		 * Readjust the checksum adjustment to take into
4477 		 * account the new IP#.
4478 		 */
4479 		CALC_SUMD(sum1, sum2, sumd);
4480 		/* XXX - dont change for TCP when solaris does
4481 		 * hardware checksumming.
4482 		 */
4483 		sumd += nat->nat_sumd[0];
4484 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4485 		nat->nat_sumd[1] = nat->nat_sumd[0];
4486 	}
4487 
4488 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4489 	SPL_X(s);
4490 }
4491 
4492 
4493 /* ------------------------------------------------------------------------ */
4494 /* Function:    fr_natifpsync                                               */
4495 /* Returns:     Nil                                                         */
4496 /* Parameters:  action(I) - how we are syncing                              */
4497 /*              ifp(I)    - pointer to network interface                    */
4498 /*              name(I)   - name of interface to sync to                    */
4499 /*                                                                          */
4500 /* This function is used to resync the mapping of interface names and their */
4501 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4502 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4503 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4504 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4505 /* there is no longer any interface associated with it.                     */
4506 /* ------------------------------------------------------------------------ */
4507 void fr_natifpsync(action, ifp, name, ifs)
4508 int action;
4509 void *ifp;
4510 char *name;
4511 ipf_stack_t *ifs;
4512 {
4513 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4514 	int s;
4515 #endif
4516 	nat_t *nat;
4517 	ipnat_t *n;
4518 
4519 	if (ifs->ifs_fr_running <= 0)
4520 		return;
4521 
4522 	SPL_NET(s);
4523 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4524 
4525 	if (ifs->ifs_fr_running <= 0) {
4526 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4527 		return;
4528 	}
4529 
4530 	switch (action)
4531 	{
4532 	case IPFSYNC_RESYNC :
4533 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4534 			if ((ifp == nat->nat_ifps[0]) ||
4535 			    (nat->nat_ifps[0] == (void *)-1)) {
4536 				nat->nat_ifps[0] =
4537 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4538 			}
4539 
4540 			if ((ifp == nat->nat_ifps[1]) ||
4541 			    (nat->nat_ifps[1] == (void *)-1)) {
4542 				nat->nat_ifps[1] =
4543 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4544 			}
4545 		}
4546 
4547 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4548 			if (n->in_ifps[0] == ifp ||
4549 			    n->in_ifps[0] == (void *)-1) {
4550 				n->in_ifps[0] =
4551 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4552 			}
4553 			if (n->in_ifps[1] == ifp ||
4554 			    n->in_ifps[1] == (void *)-1) {
4555 				n->in_ifps[1] =
4556 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4557 			}
4558 		}
4559 		break;
4560 	case IPFSYNC_NEWIFP :
4561 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4562 			if (!strncmp(name, nat->nat_ifnames[0],
4563 				     sizeof(nat->nat_ifnames[0])))
4564 				nat->nat_ifps[0] = ifp;
4565 			if (!strncmp(name, nat->nat_ifnames[1],
4566 				     sizeof(nat->nat_ifnames[1])))
4567 				nat->nat_ifps[1] = ifp;
4568 		}
4569 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4570 			if (!strncmp(name, n->in_ifnames[0],
4571 				     sizeof(n->in_ifnames[0])))
4572 				n->in_ifps[0] = ifp;
4573 			if (!strncmp(name, n->in_ifnames[1],
4574 				     sizeof(n->in_ifnames[1])))
4575 				n->in_ifps[1] = ifp;
4576 		}
4577 		break;
4578 	case IPFSYNC_OLDIFP :
4579 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4580 			if (ifp == nat->nat_ifps[0])
4581 				nat->nat_ifps[0] = (void *)-1;
4582 			if (ifp == nat->nat_ifps[1])
4583 				nat->nat_ifps[1] = (void *)-1;
4584 		}
4585 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4586 			if (n->in_ifps[0] == ifp)
4587 				n->in_ifps[0] = (void *)-1;
4588 			if (n->in_ifps[1] == ifp)
4589 				n->in_ifps[1] = (void *)-1;
4590 		}
4591 		break;
4592 	}
4593 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4594 	SPL_X(s);
4595 }
4596 
4597 
4598 /* ------------------------------------------------------------------------ */
4599 /* Function:    nat_icmpquerytype4                                          */
4600 /* Returns:     int - 1 == success, 0 == failure                            */
4601 /* Parameters:  icmptype(I) - ICMP type number                              */
4602 /*                                                                          */
4603 /* Tests to see if the ICMP type number passed is a query/response type or  */
4604 /* not.                                                                     */
4605 /* ------------------------------------------------------------------------ */
4606 static INLINE int nat_icmpquerytype4(icmptype)
4607 int icmptype;
4608 {
4609 
4610 	/*
4611 	 * For the ICMP query NAT code, it is essential that both the query
4612 	 * and the reply match on the NAT rule. Because the NAT structure
4613 	 * does not keep track of the icmptype, and a single NAT structure
4614 	 * is used for all icmp types with the same src, dest and id, we
4615 	 * simply define the replies as queries as well. The funny thing is,
4616 	 * altough it seems silly to call a reply a query, this is exactly
4617 	 * as it is defined in the IPv4 specification
4618 	 */
4619 
4620 	switch (icmptype)
4621 	{
4622 
4623 	case ICMP_ECHOREPLY:
4624 	case ICMP_ECHO:
4625 	/* route aedvertisement/solliciation is currently unsupported: */
4626 	/* it would require rewriting the ICMP data section            */
4627 	case ICMP_TSTAMP:
4628 	case ICMP_TSTAMPREPLY:
4629 	case ICMP_IREQ:
4630 	case ICMP_IREQREPLY:
4631 	case ICMP_MASKREQ:
4632 	case ICMP_MASKREPLY:
4633 		return 1;
4634 	default:
4635 		return 0;
4636 	}
4637 }
4638 
4639 
4640 /* ------------------------------------------------------------------------ */
4641 /* Function:    nat_log                                                     */
4642 /* Returns:     Nil                                                         */
4643 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4644 /*              type(I) - type of log entry to create                       */
4645 /*                                                                          */
4646 /* Creates a NAT log entry.                                                 */
4647 /* ------------------------------------------------------------------------ */
4648 void nat_log(nat, type, ifs)
4649 struct nat *nat;
4650 u_int type;
4651 ipf_stack_t *ifs;
4652 {
4653 #ifdef	IPFILTER_LOG
4654 # ifndef LARGE_NAT
4655 	struct ipnat *np;
4656 	int rulen;
4657 # endif
4658 	struct natlog natl;
4659 	void *items[1];
4660 	size_t sizes[1];
4661 	int types[1];
4662 
4663 	natl.nl_inip = nat->nat_inip;
4664 	natl.nl_outip = nat->nat_outip;
4665 	natl.nl_origip = nat->nat_oip;
4666 	natl.nl_bytes[0] = nat->nat_bytes[0];
4667 	natl.nl_bytes[1] = nat->nat_bytes[1];
4668 	natl.nl_pkts[0] = nat->nat_pkts[0];
4669 	natl.nl_pkts[1] = nat->nat_pkts[1];
4670 	natl.nl_origport = nat->nat_oport;
4671 	natl.nl_inport = nat->nat_inport;
4672 	natl.nl_outport = nat->nat_outport;
4673 	natl.nl_p = nat->nat_p;
4674 	natl.nl_type = type;
4675 	natl.nl_rule = -1;
4676 # ifndef LARGE_NAT
4677 	if (nat->nat_ptr != NULL) {
4678 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4679 		     np = np->in_next, rulen++)
4680 			if (np == nat->nat_ptr) {
4681 				natl.nl_rule = rulen;
4682 				break;
4683 			}
4684 	}
4685 # endif
4686 	items[0] = &natl;
4687 	sizes[0] = sizeof(natl);
4688 	types[0] = 0;
4689 
4690 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4691 #endif
4692 }
4693 
4694 
4695 #if defined(__OpenBSD__)
4696 /* ------------------------------------------------------------------------ */
4697 /* Function:    nat_ifdetach                                                */
4698 /* Returns:     Nil                                                         */
4699 /* Parameters:  ifp(I) - pointer to network interface                       */
4700 /*                                                                          */
4701 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4702 /* interface references within IPFilter.                                    */
4703 /* ------------------------------------------------------------------------ */
4704 void nat_ifdetach(ifp, ifs)
4705 void *ifp;
4706 ipf_stack_t *ifs;
4707 {
4708 	frsync(ifp, ifs);
4709 	return;
4710 }
4711 #endif
4712 
4713 
4714 /* ------------------------------------------------------------------------ */
4715 /* Function:    fr_ipnatderef                                               */
4716 /* Returns:     Nil                                                         */
4717 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4718 /* Write Locks: ipf_nat                                                     */
4719 /*                                                                          */
4720 /* ------------------------------------------------------------------------ */
4721 void fr_ipnatderef(inp, ifs)
4722 ipnat_t **inp;
4723 ipf_stack_t *ifs;
4724 {
4725 	ipnat_t *in;
4726 
4727 	in = *inp;
4728 	*inp = NULL;
4729 	in->in_space++;
4730 	in->in_use--;
4731 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4732 		if (in->in_apr)
4733 			appr_free(in->in_apr);
4734 		KFREE(in);
4735 		ifs->ifs_nat_stats.ns_rules--;
4736 #ifdef notdef
4737 #if SOLARIS
4738 		if (ifs->ifs_nat_stats.ns_rules == 0)
4739 			ifs->ifs_pfil_delayed_copy = 1;
4740 #endif
4741 #endif
4742 	}
4743 }
4744 
4745 
4746 /* ------------------------------------------------------------------------ */
4747 /* Function:    fr_natderef                                                 */
4748 /* Returns:     Nil                                                         */
4749 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4750 /*                                                                          */
4751 /* Decrement the reference counter for this NAT table entry and free it if  */
4752 /* there are no more things using it.                                       */
4753 /*                                                                          */
4754 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4755 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4756 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4757 /* because nat_delete() will do that and send nat_ref to -1.                */
4758 /*                                                                          */
4759 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4760 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4761 /* ------------------------------------------------------------------------ */
4762 void fr_natderef(natp, ifs)
4763 nat_t **natp;
4764 ipf_stack_t *ifs;
4765 {
4766 	nat_t *nat;
4767 
4768 	nat = *natp;
4769 	*natp = NULL;
4770 
4771 	MUTEX_ENTER(&nat->nat_lock);
4772 	if (nat->nat_ref > 1) {
4773 		nat->nat_ref--;
4774 		MUTEX_EXIT(&nat->nat_lock);
4775 		return;
4776 	}
4777 	MUTEX_EXIT(&nat->nat_lock);
4778 
4779 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4780 	nat_delete(nat, NL_EXPIRE, ifs);
4781 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4782 }
4783 
4784 
4785 /* ------------------------------------------------------------------------ */
4786 /* Function:    fr_natclone                                                 */
4787 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4788 /*                           else pointer to new state structure            */
4789 /* Parameters:  fin(I) - pointer to packet information                      */
4790 /*              is(I)  - pointer to master state structure                  */
4791 /* Write Lock:  ipf_nat                                                     */
4792 /*                                                                          */
4793 /* Create a "duplcate" state table entry from the master.                   */
4794 /* ------------------------------------------------------------------------ */
4795 static nat_t *fr_natclone(fin, nat)
4796 fr_info_t *fin;
4797 nat_t *nat;
4798 {
4799 	frentry_t *fr;
4800 	nat_t *clone;
4801 	ipnat_t *np;
4802 	ipf_stack_t *ifs = fin->fin_ifs;
4803 
4804 	KMALLOC(clone, nat_t *);
4805 	if (clone == NULL)
4806 		return NULL;
4807 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4808 
4809 	MUTEX_NUKE(&clone->nat_lock);
4810 
4811 	clone->nat_aps = NULL;
4812 	/*
4813 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4814 	 */
4815 	clone->nat_tqe.tqe_pnext = NULL;
4816 	clone->nat_tqe.tqe_next = NULL;
4817 	clone->nat_tqe.tqe_ifq = NULL;
4818 	clone->nat_tqe.tqe_parent = clone;
4819 
4820 	clone->nat_flags &= ~SI_CLONE;
4821 	clone->nat_flags |= SI_CLONED;
4822 
4823 	if (clone->nat_hm)
4824 		clone->nat_hm->hm_ref++;
4825 
4826 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4827 		KFREE(clone);
4828 		return NULL;
4829 	}
4830 	np = clone->nat_ptr;
4831 	if (np != NULL) {
4832 		if (ifs->ifs_nat_logging)
4833 			nat_log(clone, (u_int)np->in_redir, ifs);
4834 		np->in_use++;
4835 	}
4836 	fr = clone->nat_fr;
4837 	if (fr != NULL) {
4838 		MUTEX_ENTER(&fr->fr_lock);
4839 		fr->fr_ref++;
4840 		MUTEX_EXIT(&fr->fr_lock);
4841 	}
4842 
4843 	/*
4844 	 * Because the clone is created outside the normal loop of things and
4845 	 * TCP has special needs in terms of state, initialise the timeout
4846 	 * state of the new NAT from here.
4847 	 */
4848 	if (clone->nat_p == IPPROTO_TCP) {
4849 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4850 				  clone->nat_flags);
4851 	}
4852 #ifdef	IPFILTER_SYNC
4853 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4854 #endif
4855 	if (ifs->ifs_nat_logging)
4856 		nat_log(clone, NL_CLONE, ifs);
4857 	return clone;
4858 }
4859 
4860 
4861 /* ------------------------------------------------------------------------ */
4862 /* Function:   nat_wildok                                                   */
4863 /* Returns:    int - 1 == packet's ports match wildcards                    */
4864 /*                   0 == packet's ports don't match wildcards              */
4865 /* Parameters: nat(I)   - NAT entry                                         */
4866 /*             sport(I) - source port                                       */
4867 /*             dport(I) - destination port                                  */
4868 /*             flags(I) - wildcard flags                                    */
4869 /*             dir(I)   - packet direction                                  */
4870 /*                                                                          */
4871 /* Use NAT entry and packet direction to determine which combination of     */
4872 /* wildcard flags should be used.                                           */
4873 /* ------------------------------------------------------------------------ */
4874 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4875 nat_t *nat;
4876 int sport;
4877 int dport;
4878 int flags;
4879 int dir;
4880 {
4881 	/*
4882 	 * When called by       dir is set to
4883 	 * nat_inlookup         NAT_INBOUND (0)
4884 	 * nat_outlookup        NAT_OUTBOUND (1)
4885 	 *
4886 	 * We simply combine the packet's direction in dir with the original
4887 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4888 	 * which combination of wildcard flags to allow.
4889 	 */
4890 
4891 	switch ((dir << 1) | nat->nat_dir)
4892 	{
4893 	case 3: /* outbound packet / outbound entry */
4894 		if (((nat->nat_inport == sport) ||
4895 		    (flags & SI_W_SPORT)) &&
4896 		    ((nat->nat_oport == dport) ||
4897 		    (flags & SI_W_DPORT)))
4898 			return 1;
4899 		break;
4900 	case 2: /* outbound packet / inbound entry */
4901 		if (((nat->nat_outport == sport) ||
4902 		    (flags & SI_W_DPORT)) &&
4903 		    ((nat->nat_oport == dport) ||
4904 		    (flags & SI_W_SPORT)))
4905 			return 1;
4906 		break;
4907 	case 1: /* inbound packet / outbound entry */
4908 		if (((nat->nat_oport == sport) ||
4909 		    (flags & SI_W_DPORT)) &&
4910 		    ((nat->nat_outport == dport) ||
4911 		    (flags & SI_W_SPORT)))
4912 			return 1;
4913 		break;
4914 	case 0: /* inbound packet / inbound entry */
4915 		if (((nat->nat_oport == sport) ||
4916 		    (flags & SI_W_SPORT)) &&
4917 		    ((nat->nat_outport == dport) ||
4918 		    (flags & SI_W_DPORT)))
4919 			return 1;
4920 		break;
4921 	default:
4922 		break;
4923 	}
4924 
4925 	return(0);
4926 }
4927 
4928 
4929 /* ------------------------------------------------------------------------ */
4930 /* Function:    nat_mssclamp                                                */
4931 /* Returns:     Nil                                                         */
4932 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4933 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4934 /*              csump(I)  - pointer to TCP checksum                         */
4935 /*                                                                          */
4936 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4937 /* then the TCP header checksum will be updated to reflect the change in    */
4938 /* the MSS.                                                                 */
4939 /* ------------------------------------------------------------------------ */
4940 static void nat_mssclamp(tcp, maxmss, csump)
4941 tcphdr_t *tcp;
4942 u_32_t maxmss;
4943 u_short *csump;
4944 {
4945 	u_char *cp, *ep, opt;
4946 	int hlen, advance;
4947 	u_32_t mss, sumd;
4948 
4949 	hlen = TCP_OFF(tcp) << 2;
4950 	if (hlen > sizeof(*tcp)) {
4951 		cp = (u_char *)tcp + sizeof(*tcp);
4952 		ep = (u_char *)tcp + hlen;
4953 
4954 		while (cp < ep) {
4955 			opt = cp[0];
4956 			if (opt == TCPOPT_EOL)
4957 				break;
4958 			else if (opt == TCPOPT_NOP) {
4959 				cp++;
4960 				continue;
4961 			}
4962 
4963 			if (cp + 1 >= ep)
4964 				break;
4965 			advance = cp[1];
4966 			if ((cp + advance > ep) || (advance <= 0))
4967 				break;
4968 			switch (opt)
4969 			{
4970 			case TCPOPT_MAXSEG:
4971 				if (advance != 4)
4972 					break;
4973 				mss = cp[2] * 256 + cp[3];
4974 				if (mss > maxmss) {
4975 					cp[2] = maxmss / 256;
4976 					cp[3] = maxmss & 0xff;
4977 					CALC_SUMD(mss, maxmss, sumd);
4978 					fix_outcksum(csump, sumd);
4979 				}
4980 				break;
4981 			default:
4982 				/* ignore unknown options */
4983 				break;
4984 			}
4985 
4986 			cp += advance;
4987 		}
4988 	}
4989 }
4990 
4991 
4992 /* ------------------------------------------------------------------------ */
4993 /* Function:    fr_setnatqueue                                              */
4994 /* Returns:     Nil                                                         */
4995 /* Parameters:  nat(I)- pointer to NAT structure                            */
4996 /*              rev(I) - forward(0) or reverse(1) direction                 */
4997 /* Locks:       ipf_nat (read or write)                                     */
4998 /*                                                                          */
4999 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5000 /* determining which queue it should be placed on.                          */
5001 /* ------------------------------------------------------------------------ */
5002 void fr_setnatqueue(nat, rev, ifs)
5003 nat_t *nat;
5004 int rev;
5005 ipf_stack_t *ifs;
5006 {
5007 	ipftq_t *oifq, *nifq;
5008 
5009 	if (nat->nat_ptr != NULL)
5010 		nifq = nat->nat_ptr->in_tqehead[rev];
5011 	else
5012 		nifq = NULL;
5013 
5014 	if (nifq == NULL) {
5015 		switch (nat->nat_p)
5016 		{
5017 		case IPPROTO_UDP :
5018 			nifq = &ifs->ifs_nat_udptq;
5019 			break;
5020 		case IPPROTO_ICMP :
5021 			nifq = &ifs->ifs_nat_icmptq;
5022 			break;
5023 		case IPPROTO_TCP :
5024 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5025 			break;
5026 		default :
5027 			nifq = &ifs->ifs_nat_iptq;
5028 			break;
5029 		}
5030 	}
5031 
5032 	oifq = nat->nat_tqe.tqe_ifq;
5033 	/*
5034 	 * If it's currently on a timeout queue, move it from one queue to
5035 	 * another, else put it on the end of the newly determined queue.
5036 	 */
5037 	if (oifq != NULL)
5038 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5039 	else
5040 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5041 	return;
5042 }
5043 
5044 /* Function:    nat_getnext                                                 */
5045 /* Returns:     int - 0 == ok, else error                                   */
5046 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5047 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5048 /*                                                                          */
5049 /* Fetch the next nat/ipnat structure pointer from the linked list and      */
5050 /* copy it out to the storage space pointed to by itp_data.  The next item  */
5051 /* in the list to look at is put back in the ipftoken struture.             */
5052 /* If we call ipf_freetoken, the accompanying pointer is set to NULL because*/
5053 /* ipf_freetoken will call a deref function for us and we dont want to call */
5054 /* that twice (second time would be in the second switch statement below.   */
5055 /* ------------------------------------------------------------------------ */
5056 static int nat_getnext(t, itp, ifs)
5057 ipftoken_t *t;
5058 ipfgeniter_t *itp;
5059 ipf_stack_t *ifs;
5060 {
5061 	hostmap_t *hm, *nexthm = NULL, zerohm;
5062 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5063 	nat_t *nat, *nextnat = NULL, zeronat;
5064 	int error = 0;
5065 
5066 	READ_ENTER(&ifs->ifs_ipf_nat);
5067 	switch (itp->igi_type)
5068 	{
5069 	case IPFGENITER_HOSTMAP :
5070 		hm = t->ipt_data;
5071 		if (hm == NULL) {
5072 			nexthm = ifs->ifs_ipf_hm_maplist;
5073 		} else {
5074 			nexthm = hm->hm_hnext;
5075 		}
5076 		if (nexthm != NULL) {
5077 			if (nexthm->hm_hnext == NULL) {
5078 				t->ipt_alive = 0;
5079 				ipf_unlinktoken(t, ifs);
5080 				KFREE(t);
5081 			} else {
5082 				/*MUTEX_ENTER(&nexthm->hm_lock);*/
5083 				nexthm->hm_ref++;
5084 				/*MUTEX_EXIT(&nextipnat->hm_lock);*/
5085 			}
5086 
5087 		} else {
5088 			bzero(&zerohm, sizeof(zerohm));
5089 			nexthm = &zerohm;
5090 			ipf_freetoken(t, ifs);
5091 		}
5092 		break;
5093 
5094 	case IPFGENITER_IPNAT :
5095 		ipn = t->ipt_data;
5096 		if (ipn == NULL) {
5097 			nextipnat = ifs->ifs_nat_list;
5098 		} else {
5099 			nextipnat = ipn->in_next;
5100 		}
5101 		if (nextipnat != NULL) {
5102 			if (nextipnat->in_next == NULL) {
5103 				t->ipt_alive = 0;
5104 				ipf_unlinktoken(t, ifs);
5105 				KFREE(t);
5106 			} else {
5107 				/* MUTEX_ENTER(&nextipnat->in_lock); */
5108 				nextipnat->in_use++;
5109 				/* MUTEX_EXIT(&nextipnat->in_lock); */
5110 			}
5111 		} else {
5112 			bzero(&zeroipn, sizeof(zeroipn));
5113 			nextipnat = &zeroipn;
5114 			ipf_freetoken(t, ifs);
5115 		}
5116 		break;
5117 
5118 	case IPFGENITER_NAT :
5119 		nat = t->ipt_data;
5120 		if (nat == NULL) {
5121 			nextnat = ifs->ifs_nat_instances;
5122 		} else {
5123 			nextnat = nat->nat_next;
5124 		}
5125 		if (nextnat != NULL) {
5126 			if (nextnat->nat_next == NULL) {
5127 				t->ipt_alive = 0;
5128 				ipf_unlinktoken(t, ifs);
5129 				KFREE(t);
5130 			} else {
5131 				MUTEX_ENTER(&nextnat->nat_lock);
5132 				nextnat->nat_ref++;
5133 				MUTEX_EXIT(&nextnat->nat_lock);
5134 			}
5135 		} else {
5136 			bzero(&zeronat, sizeof(zeronat));
5137 			nextnat = &zeronat;
5138 			ipf_freetoken(t, ifs);
5139 		}
5140 		break;
5141 	}
5142 
5143 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5144 
5145 	switch (itp->igi_type)
5146 	{
5147 	case IPFGENITER_HOSTMAP :
5148 		if (hm != NULL) {
5149 			WRITE_ENTER(&ifs->ifs_ipf_nat);
5150 			fr_hostmapderef(&hm);
5151 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5152 		}
5153 		if (nexthm->hm_hnext != NULL)
5154 			t->ipt_data = nexthm;
5155 		error = COPYOUT(nexthm, itp->igi_data, sizeof(*nexthm));
5156 		if (error != 0)
5157 			error = EFAULT;
5158 		break;
5159 
5160 	case IPFGENITER_IPNAT :
5161 		if (ipn != NULL)
5162 			fr_ipnatderef(&ipn, ifs);
5163 		if (nextipnat->in_next != NULL)
5164 			t->ipt_data = nextipnat;
5165 		error = COPYOUT(nextipnat, itp->igi_data, sizeof(*nextipnat));
5166 		if (error != 0)
5167 			error = EFAULT;
5168 		break;
5169 
5170 	case IPFGENITER_NAT :
5171 		if (nat != NULL)
5172 			fr_natderef(&nat, ifs);
5173 		if (nextnat->nat_next != NULL)
5174 			t->ipt_data = nextnat;
5175 		error = COPYOUT(nextnat, itp->igi_data, sizeof(*nextnat));
5176 		if (error != 0)
5177 			error = EFAULT;
5178 		break;
5179 	}
5180 
5181 	return error;
5182 }
5183 
5184 
5185 /* ------------------------------------------------------------------------ */
5186 /* Function:    nat_iterator                                                */
5187 /* Returns:     int - 0 == ok, else error                                   */
5188 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5189 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5190 /*                                                                          */
5191 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5192 /* generic structure to iterate through a list.  There are three different  */
5193 /* linked lists of NAT related information to go through: NAT rules, active */
5194 /* NAT mappings and the NAT fragment cache.                                 */
5195 /* ------------------------------------------------------------------------ */
5196 static int nat_iterator(token, itp, ifs)
5197 ipftoken_t *token;
5198 ipfgeniter_t *itp;
5199 ipf_stack_t *ifs;
5200 {
5201 	int error;
5202 
5203 	if (itp->igi_data == NULL)
5204 		return EFAULT;
5205 
5206 	token->ipt_subtype = itp->igi_type;
5207 
5208 	switch (itp->igi_type)
5209 	{
5210 	case IPFGENITER_HOSTMAP :
5211 	case IPFGENITER_IPNAT :
5212 	case IPFGENITER_NAT :
5213 		error = nat_getnext(token, itp, ifs);
5214 		break;
5215 	case IPFGENITER_NATFRAG :
5216 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5217 				    &ifs->ifs_ipfr_nattail,
5218 				    &ifs->ifs_ipf_natfrag, ifs);
5219 		break;
5220 	default :
5221 		error = EINVAL;
5222 		break;
5223 	}
5224 
5225 	return error;
5226 }
5227 
5228 
5229 /* -------------------------------------------------------------------- */
5230 /* Function:	nat_earlydrop						*/
5231 /* Returns:	number of dropped/removed entries from the queue	*/
5232 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5233 /*		maxidle - entry must be idle this long to be dropped	*/
5234 /*		ifs - ipf stack instance				*/
5235 /*									*/
5236 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5237 /* form specified timeout queue, based on how long they've sat idle,	*/
5238 /* without waiting for it to happen on its own.				*/
5239 /* -------------------------------------------------------------------- */
5240 static int nat_earlydrop(ifq, maxidle, ifs)
5241 ipftq_t *ifq;
5242 int maxidle;
5243 ipf_stack_t *ifs;
5244 {
5245 	ipftqent_t *tqe, *tqn;
5246 	nat_t *nat;
5247 	unsigned int dropped;
5248 	int droptick;
5249 
5250 	if (ifq == NULL)
5251 		return (0);
5252 
5253 	dropped = 0;
5254 
5255 	/*
5256 	 * Determine the tick representing the idle time we're interested
5257 	 * in.  If an entry exists in the queue, and it was touched before
5258 	 * that tick, then it's been idle longer than maxidle ... remove it.
5259 	 */
5260 	droptick = ifs->ifs_fr_ticks - maxidle;
5261 	tqn = ifq->ifq_head;
5262 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5263 		tqn = tqe->tqe_next;
5264 		nat = tqe->tqe_parent;
5265 		nat_delete(nat, ISL_EXPIRE, ifs);
5266 		dropped++;
5267 	}
5268 	return (dropped);
5269 }
5270 
5271 
5272 /* --------------------------------------------------------------------- */
5273 /* Function:	nat_flushclosing					 */
5274 /* Returns:	int - number of NAT entries deleted			 */
5275 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5276 /*		ifs - ipf stack instance				 */
5277 /*									 */
5278 /* Remove nat table entries for TCP connections which are in the process */
5279 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5280 /* --------------------------------------------------------------------- */
5281 static int nat_flushclosing(stateval, ifs)
5282 int stateval;
5283 ipf_stack_t *ifs;
5284 {
5285 	ipftq_t *ifq, *ifqn;
5286 	ipftqent_t *tqe, *tqn;
5287 	nat_t *nat;
5288 	int dropped;
5289 
5290 	dropped = 0;
5291 
5292 	/*
5293 	 * Start by deleting any entries in specific timeout queues.
5294 	 */
5295 	ifqn = &ifs->ifs_nat_tqb[stateval];
5296 	while ((ifq = ifqn) != NULL) {
5297 		ifqn = ifq->ifq_next;
5298 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5299 	}
5300 
5301 	/*
5302 	 * Next, look through user defined queues for closing entries.
5303 	 */
5304 	ifqn = ifs->ifs_nat_utqe;
5305 	while ((ifq = ifqn) != NULL) {
5306 		ifqn = ifq->ifq_next;
5307 		tqn = ifq->ifq_head;
5308 		while ((tqe = tqn) != NULL) {
5309 			tqn = tqe->tqe_next;
5310 			nat = tqe->tqe_parent;
5311 			if (nat->nat_p != IPPROTO_TCP)
5312 				continue;
5313 			if ((nat->nat_tcpstate[0] >= stateval) &&
5314 			    (nat->nat_tcpstate[1] >= stateval)) {
5315 				nat_delete(nat, NL_EXPIRE, ifs);
5316 				dropped++;
5317 			}
5318 		}
5319 	}
5320 	return (dropped);
5321 }
5322 
5323 
5324 /* --------------------------------------------------------------------- */
5325 /* Function:	nat_extraflush						 */
5326 /* Returns:	int - number of NAT entries deleted			 */
5327 /* Parameters:	which(I) - how to flush the active NAT table		 */
5328 /*		ifs - ipf stack instance				 */
5329 /* Write Locks:	ipf_nat							 */
5330 /*									 */
5331 /* Flush nat tables.  Three actions currently defined:			 */
5332 /*									 */
5333 /* which == 0 :	Flush all nat table entries.				 */
5334 /*									 */
5335 /* which == 1 :	Flush entries with TCP connections which have started	 */
5336 /*		to close on both ends.					 */
5337 /*									 */
5338 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5339 /*		does not take us below specified threshold in the table, */
5340 /*		we want to flush entries with TCP connections which have */
5341 /*		been idle for a long time.  Start with connections idle	 */
5342 /*		over 12 hours,  and then work backwards in half hour	 */
5343 /*		increments to at most 30 minutes idle, and finally work	 */
5344 /*		back in 30 second increments to at most 30 seconds.	 */
5345 /* --------------------------------------------------------------------- */
5346 static int nat_extraflush(which, ifs)
5347 int which;
5348 ipf_stack_t *ifs;
5349 {
5350 	ipftq_t *ifq, *ifqn;
5351 	nat_t *nat, **natp;
5352 	int idletime, removed, idle_idx;
5353 	SPL_INT(s);
5354 
5355 	removed = 0;
5356 
5357 	SPL_NET(s);
5358 	switch (which)
5359 	{
5360 	case 0:
5361 		natp = &ifs->ifs_nat_instances;
5362 		while ((nat = *natp) != NULL) {
5363 			natp = &nat->nat_next;
5364 			nat_delete(nat, ISL_FLUSH, ifs);
5365 			removed++;
5366 		}
5367 		break;
5368 
5369 	case 1:
5370 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5371 		break;
5372 
5373 	case 2:
5374 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5375 
5376 		/*
5377 		 * Be sure we haven't done this in the last 10 seconds.
5378 		 */
5379 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5380 		    IPF_TTLVAL(10))
5381 			break;
5382 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5383 
5384 		/*
5385 		 * Determine initial threshold for minimum idle time based on
5386 		 * how long ipfilter has been running.  Ipfilter needs to have
5387 		 * been up as long as the smallest interval to continue on.
5388 		 *
5389 		 * Minimum idle times stored in idletime_tab and indexed by
5390 		 * idle_idx.  Start at upper end of array and work backwards.
5391 		 *
5392 		 * Once the index is found, set the initial idle time to the
5393 		 * first interval before the current ipfilter run time.
5394 		 */
5395 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5396 			break;  /* switch */
5397 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5398 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5399 			idletime = idletime_tab[idle_idx];
5400 		} else {
5401 			while ((idle_idx > 0) &&
5402 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5403 				idle_idx--;
5404 			idletime = (ifs->ifs_fr_ticks /
5405 				    idletime_tab[idle_idx]) *
5406 				    idletime_tab[idle_idx];
5407 		}
5408 
5409 		while ((idle_idx >= 0) &&
5410 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5411 			/*
5412 			 * Start with appropriate timeout queue.
5413 			 */
5414 			removed += nat_earlydrop(
5415 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5416 					idletime, ifs);
5417 
5418 			/*
5419 			 * Make sure we haven't already deleted enough
5420 			 * entries before checking the user defined queues.
5421 			 */
5422 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5423 			    ifs->ifs_nat_flush_lvl_lo)
5424 				break;
5425 
5426 			/*
5427 			 * Next, look through the user defined queues.
5428 			 */
5429 			ifqn = ifs->ifs_nat_utqe;
5430 			while ((ifq = ifqn) != NULL) {
5431 				ifqn = ifq->ifq_next;
5432 				removed += nat_earlydrop(ifq, idletime, ifs);
5433 			}
5434 
5435 			/*
5436 			 * Adjust the granularity of idle time.
5437 			 *
5438 			 * If we reach an interval boundary, we need to
5439 			 * either adjust the idle time accordingly or exit
5440 			 * the loop altogether (if this is very last check).
5441 			 */
5442 			idletime -= idletime_tab[idle_idx];
5443 			if (idletime < idletime_tab[idle_idx]) {
5444 				if (idle_idx != 0) {
5445 					idletime = idletime_tab[idle_idx] -
5446 					    idletime_tab[idle_idx - 1];
5447 					idle_idx--;
5448 				} else {
5449 					break;  /* while */
5450 				}
5451 			}
5452 		}
5453 		break;
5454 	default:
5455 		break;
5456 	}
5457 
5458 	SPL_X(s);
5459 	return (removed);
5460 }
5461