xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 8899fcfa)
1 /*
2  * Copyright (C) 1995-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	void	nat_delrdr __P((struct ipnat *));
149 static	void	nat_delnat __P((struct ipnat *));
150 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
151 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
153 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
154 static	int	nat_match __P((fr_info_t *, ipnat_t *));
155 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
156 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
158 				    struct in_addr, struct in_addr, u_32_t,
159 				    ipf_stack_t *));
160 static	INLINE	int nat_icmpquerytype4 __P((int));
161 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
162 				    ipf_stack_t *));
163 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
164 				    ipf_stack_t *));
165 static	INLINE	int nat_icmperrortype4 __P((int));
166 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
167 				      tcphdr_t *, nat_t **, int));
168 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
169 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
170 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
171 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
172 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
173 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
174 static	int	nat_extraflush __P((int, ipf_stack_t *));
175 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
176 static	int	nat_flushclosing __P((int, ipf_stack_t *));
177 
178 
179 /*
180  * Below we declare a list of constants used only in the nat_extraflush()
181  * routine.  We are placing it here, instead of in nat_extraflush() itself,
182  * because we want to make it visible to tools such as mdb, nm etc., so the
183  * values can easily be altered during debugging.
184  */
185 static	const int	idletime_tab[] = {
186 	IPF_TTLVAL(30),		/* 30 seconds */
187 	IPF_TTLVAL(1800),	/* 30 minutes */
188 	IPF_TTLVAL(43200),	/* 12 hours */
189 	IPF_TTLVAL(345600),	/* 4 days */
190 };
191 
192 
193 /* ------------------------------------------------------------------------ */
194 /* Function:    fr_natinit                                                  */
195 /* Returns:     int - 0 == success, -1 == failure                           */
196 /* Parameters:  Nil                                                         */
197 /*                                                                          */
198 /* Initialise all of the NAT locks, tables and other structures.            */
199 /* ------------------------------------------------------------------------ */
200 int fr_natinit(ifs)
201 ipf_stack_t *ifs;
202 {
203 	int i;
204 
205 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
206 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
207 	if (ifs->ifs_nat_table[0] != NULL)
208 		bzero((char *)ifs->ifs_nat_table[0],
209 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
210 	else
211 		return -1;
212 
213 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
214 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
215 	if (ifs->ifs_nat_table[1] != NULL)
216 		bzero((char *)ifs->ifs_nat_table[1],
217 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
218 	else
219 		return -2;
220 
221 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
222 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
223 	if (ifs->ifs_nat_rules != NULL)
224 		bzero((char *)ifs->ifs_nat_rules,
225 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
226 	else
227 		return -3;
228 
229 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
230 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
231 	if (ifs->ifs_rdr_rules != NULL)
232 		bzero((char *)ifs->ifs_rdr_rules,
233 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
234 	else
235 		return -4;
236 
237 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
238 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
239 	if (ifs->ifs_maptable != NULL)
240 		bzero((char *)ifs->ifs_maptable,
241 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
242 	else
243 		return -5;
244 
245 	ifs->ifs_ipf_hm_maplist = NULL;
246 
247 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
248 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
249 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
250 		return -1;
251 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
252 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
253 
254 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
255 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
256 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
257 		return -1;
258 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
259 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
260 
261 	if (ifs->ifs_fr_nat_maxbucket == 0) {
262 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
263 			ifs->ifs_fr_nat_maxbucket++;
264 		ifs->ifs_fr_nat_maxbucket *= 2;
265 	}
266 
267 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
268 	/*
269 	 * Increase this because we may have "keep state" following this too
270 	 * and packet storms can occur if this is removed too quickly.
271 	 */
272 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
273 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
274 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
275 	ifs->ifs_nat_udptq.ifq_ref = 1;
276 	ifs->ifs_nat_udptq.ifq_head = NULL;
277 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
278 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
279 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
280 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
281 	ifs->ifs_nat_icmptq.ifq_ref = 1;
282 	ifs->ifs_nat_icmptq.ifq_head = NULL;
283 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
284 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
285 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
286 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
287 	ifs->ifs_nat_iptq.ifq_ref = 1;
288 	ifs->ifs_nat_iptq.ifq_head = NULL;
289 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
290 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
291 	ifs->ifs_nat_iptq.ifq_next = NULL;
292 
293 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
294 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
295 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
296 #ifdef LARGE_NAT
297 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
298 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
299 #endif
300 	}
301 
302 	/*
303 	 * Increase this because we may have "keep state" following
304 	 * this too and packet storms can occur if this is removed
305 	 * too quickly.
306 	 */
307 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
308 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
309 
310 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
311 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
312 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
313 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
314 
315 	ifs->ifs_fr_nat_init = 1;
316 
317 	return 0;
318 }
319 
320 
321 /* ------------------------------------------------------------------------ */
322 /* Function:    nat_addrdr                                                  */
323 /* Returns:     Nil                                                         */
324 /* Parameters:  n(I) - pointer to NAT rule to add                           */
325 /*                                                                          */
326 /* Adds a redirect rule to the hash table of redirect rules and the list of */
327 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
328 /* use by redirect rules.                                                   */
329 /* ------------------------------------------------------------------------ */
330 static void nat_addrdr(n, ifs)
331 ipnat_t *n;
332 ipf_stack_t *ifs;
333 {
334 	ipnat_t **np;
335 	u_32_t j;
336 	u_int hv;
337 	int k;
338 
339 	k = count4bits(n->in_outmsk);
340 	if ((k >= 0) && (k != 32))
341 		ifs->ifs_rdr_masks |= 1 << k;
342 	j = (n->in_outip & n->in_outmsk);
343 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
344 	np = ifs->ifs_rdr_rules + hv;
345 	while (*np != NULL)
346 		np = &(*np)->in_rnext;
347 	n->in_rnext = NULL;
348 	n->in_prnext = np;
349 	n->in_hv = hv;
350 	*np = n;
351 }
352 
353 
354 /* ------------------------------------------------------------------------ */
355 /* Function:    nat_addnat                                                  */
356 /* Returns:     Nil                                                         */
357 /* Parameters:  n(I) - pointer to NAT rule to add                           */
358 /*                                                                          */
359 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
360 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
361 /* redirect rules.                                                          */
362 /* ------------------------------------------------------------------------ */
363 static void nat_addnat(n, ifs)
364 ipnat_t *n;
365 ipf_stack_t *ifs;
366 {
367 	ipnat_t **np;
368 	u_32_t j;
369 	u_int hv;
370 	int k;
371 
372 	k = count4bits(n->in_inmsk);
373 	if ((k >= 0) && (k != 32))
374 		ifs->ifs_nat_masks |= 1 << k;
375 	j = (n->in_inip & n->in_inmsk);
376 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
377 	np = ifs->ifs_nat_rules + hv;
378 	while (*np != NULL)
379 		np = &(*np)->in_mnext;
380 	n->in_mnext = NULL;
381 	n->in_pmnext = np;
382 	n->in_hv = hv;
383 	*np = n;
384 }
385 
386 
387 /* ------------------------------------------------------------------------ */
388 /* Function:    nat_delrdr                                                  */
389 /* Returns:     Nil                                                         */
390 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
391 /*                                                                          */
392 /* Removes a redirect rule from the hash table of redirect rules.           */
393 /* ------------------------------------------------------------------------ */
394 static void nat_delrdr(n)
395 ipnat_t *n;
396 {
397 	if (n->in_rnext)
398 		n->in_rnext->in_prnext = n->in_prnext;
399 	*n->in_prnext = n->in_rnext;
400 }
401 
402 
403 /* ------------------------------------------------------------------------ */
404 /* Function:    nat_delnat                                                  */
405 /* Returns:     Nil                                                         */
406 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
407 /*                                                                          */
408 /* Removes a NAT map rule from the hash table of NAT map rules.             */
409 /* ------------------------------------------------------------------------ */
410 static void nat_delnat(n)
411 ipnat_t *n;
412 {
413 	if (n->in_mnext != NULL)
414 		n->in_mnext->in_pmnext = n->in_pmnext;
415 	*n->in_pmnext = n->in_mnext;
416 }
417 
418 
419 /* ------------------------------------------------------------------------ */
420 /* Function:    nat_hostmap                                                 */
421 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
422 /*                                else a pointer to the hostmapping to use  */
423 /* Parameters:  np(I)   - pointer to NAT rule                               */
424 /*              real(I) - real IP address                                   */
425 /*              map(I)  - mapped IP address                                 */
426 /*              port(I) - destination port number                           */
427 /* Write Locks: ipf_nat                                                     */
428 /*                                                                          */
429 /* Check if an ip address has already been allocated for a given mapping    */
430 /* that is not doing port based translation.  If is not yet allocated, then */
431 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
432 /* ------------------------------------------------------------------------ */
433 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
434 ipnat_t *np;
435 struct in_addr src;
436 struct in_addr dst;
437 struct in_addr map;
438 u_32_t port;
439 ipf_stack_t *ifs;
440 {
441 	hostmap_t *hm;
442 	u_int hv;
443 
444 	hv = (src.s_addr ^ dst.s_addr);
445 	hv += src.s_addr;
446 	hv += dst.s_addr;
447 	hv %= HOSTMAP_SIZE;
448 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
449 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
450 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
451 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
452 		    ((port == 0) || (port == hm->hm_port))) {
453 			hm->hm_ref++;
454 			return hm;
455 		}
456 
457 	if (np == NULL)
458 		return NULL;
459 
460 	KMALLOC(hm, hostmap_t *);
461 	if (hm) {
462 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
463 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
464 		if (ifs->ifs_ipf_hm_maplist != NULL)
465 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
466 		ifs->ifs_ipf_hm_maplist = hm;
467 
468 		hm->hm_next = ifs->ifs_maptable[hv];
469 		hm->hm_pnext = ifs->ifs_maptable + hv;
470 		if (ifs->ifs_maptable[hv] != NULL)
471 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
472 		ifs->ifs_maptable[hv] = hm;
473 		hm->hm_ipnat = np;
474 		hm->hm_srcip = src;
475 		hm->hm_dstip = dst;
476 		hm->hm_mapip = map;
477 		hm->hm_ref = 1;
478 		hm->hm_port = port;
479 	}
480 	return hm;
481 }
482 
483 
484 /* ------------------------------------------------------------------------ */
485 /* Function:    fr_hostmapdel                                              */
486 /* Returns:     Nil                                                         */
487 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
488 /* Write Locks: ipf_nat                                                     */
489 /*                                                                          */
490 /* Decrement the references to this hostmap structure by one.  If this      */
491 /* reaches zero then remove it and free it.                                 */
492 /* ------------------------------------------------------------------------ */
493 void fr_hostmapdel(hmp)
494 struct hostmap **hmp;
495 {
496 	struct hostmap *hm;
497 
498 	hm = *hmp;
499 	*hmp = NULL;
500 
501 	hm->hm_ref--;
502 	if (hm->hm_ref == 0) {
503 		if (hm->hm_next)
504 			hm->hm_next->hm_pnext = hm->hm_pnext;
505 		*hm->hm_pnext = hm->hm_next;
506 		if (hm->hm_hnext)
507 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
508 		*hm->hm_phnext = hm->hm_hnext;
509 		KFREE(hm);
510 	}
511 }
512 
513 
514 /* ------------------------------------------------------------------------ */
515 /* Function:    fix_outcksum                                                */
516 /* Returns:     Nil                                                         */
517 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
518 /*              n((I)  - amount to adjust checksum by                       */
519 /*                                                                          */
520 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
521 /* ------------------------------------------------------------------------ */
522 void fix_outcksum(sp, n)
523 u_short *sp;
524 u_32_t n;
525 {
526 	u_short sumshort;
527 	u_32_t sum1;
528 
529 	if (n == 0)
530 		return;
531 
532 	sum1 = (~ntohs(*sp)) & 0xffff;
533 	sum1 += (n);
534 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
535 	/* Again */
536 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
537 	sumshort = ~(u_short)sum1;
538 	*(sp) = htons(sumshort);
539 }
540 
541 
542 /* ------------------------------------------------------------------------ */
543 /* Function:    fix_incksum                                                 */
544 /* Returns:     Nil                                                         */
545 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
546 /*              n((I)  - amount to adjust checksum by                       */
547 /*                                                                          */
548 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
549 /* ------------------------------------------------------------------------ */
550 void fix_incksum(sp, n)
551 u_short *sp;
552 u_32_t n;
553 {
554 	u_short sumshort;
555 	u_32_t sum1;
556 
557 	if (n == 0)
558 		return;
559 
560 	sum1 = (~ntohs(*sp)) & 0xffff;
561 	sum1 += ~(n) & 0xffff;
562 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
563 	/* Again */
564 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
565 	sumshort = ~(u_short)sum1;
566 	*(sp) = htons(sumshort);
567 }
568 
569 
570 /* ------------------------------------------------------------------------ */
571 /* Function:    fix_datacksum                                               */
572 /* Returns:     Nil                                                         */
573 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
574 /*              n((I)  - amount to adjust checksum by                       */
575 /*                                                                          */
576 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
577 /* data section of an IP packet.                                            */
578 /*                                                                          */
579 /* The only situation in which you need to do this is when NAT'ing an       */
580 /* ICMP error message. Such a message, contains in its body the IP header   */
581 /* of the original IP packet, that causes the error.                        */
582 /*                                                                          */
583 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
584 /* kernel the data section of the ICMP error is just data, and no special   */
585 /* processing like hardware cksum or ntohs processing have been done by the */
586 /* kernel on the data section.                                              */
587 /* ------------------------------------------------------------------------ */
588 void fix_datacksum(sp, n)
589 u_short *sp;
590 u_32_t n;
591 {
592 	u_short sumshort;
593 	u_32_t sum1;
594 
595 	if (n == 0)
596 		return;
597 
598 	sum1 = (~ntohs(*sp)) & 0xffff;
599 	sum1 += (n);
600 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
601 	/* Again */
602 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
603 	sumshort = ~(u_short)sum1;
604 	*(sp) = htons(sumshort);
605 }
606 
607 
608 /* ------------------------------------------------------------------------ */
609 /* Function:    fr_nat_ioctl                                                */
610 /* Returns:     int - 0 == success, != 0 == failure                         */
611 /* Parameters:  data(I) - pointer to ioctl data                             */
612 /*              cmd(I)  - ioctl command integer                             */
613 /*              mode(I) - file mode bits used with open                     */
614 /*                                                                          */
615 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
616 /* ------------------------------------------------------------------------ */
617 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
618 ioctlcmd_t cmd;
619 caddr_t data;
620 int mode, uid;
621 void *ctx;
622 ipf_stack_t *ifs;
623 {
624 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
625 	int error = 0, ret, arg, getlock;
626 	ipnat_t natd;
627 
628 #if (BSD >= 199306) && defined(_KERNEL)
629 	if ((securelevel >= 2) && (mode & FWRITE))
630 		return EPERM;
631 #endif
632 
633 #if defined(__osf__) && defined(_KERNEL)
634 	getlock = 0;
635 #else
636 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
637 #endif
638 
639 	nat = NULL;     /* XXX gcc -Wuninitialized */
640 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
641 		KMALLOC(nt, ipnat_t *);
642 	} else {
643 		nt = NULL;
644 	}
645 
646 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
647 		if (mode & NAT_SYSSPACE) {
648 			bcopy(data, (char *)&natd, sizeof(natd));
649 			error = 0;
650 		} else {
651 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
652 		}
653 
654 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
655 		BCOPYIN(data, &arg, sizeof(arg));
656 	}
657 
658 	if (error != 0)
659 		goto done;
660 
661 	/*
662 	 * For add/delete, look to see if the NAT entry is already present
663 	 */
664 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
665 		nat = &natd;
666 		if (nat->in_v == 0)	/* For backward compat. */
667 			nat->in_v = 4;
668 		nat->in_flags &= IPN_USERFLAGS;
669 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
670 			if ((nat->in_flags & IPN_SPLIT) == 0)
671 				nat->in_inip &= nat->in_inmsk;
672 			if ((nat->in_flags & IPN_IPRANGE) == 0)
673 				nat->in_outip &= nat->in_outmsk;
674 		}
675 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
676 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
677 		     np = &n->in_next)
678 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
679 			    IPN_CMPSIZ) == 0) {
680 				if (nat->in_redir == NAT_REDIRECT &&
681 				    nat->in_pnext != n->in_pnext)
682 					continue;
683 				break;
684 			}
685 	}
686 
687 	switch (cmd)
688 	{
689 	case SIOCGENITER :
690 	    {
691 		ipfgeniter_t iter;
692 		ipftoken_t *token;
693 
694 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
695 		if (error != 0)
696 			break;
697 
698 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
699 		if (token != NULL)
700 			error  = nat_iterator(token, &iter, ifs);
701 		else
702 			error = ESRCH;
703 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
704 		break;
705 	    }
706 #ifdef  IPFILTER_LOG
707 	case SIOCIPFFB :
708 	{
709 		int tmp;
710 
711 		if (!(mode & FWRITE))
712 			error = EPERM;
713 		else {
714 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
715 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
716 		}
717 		break;
718 	}
719 	case SIOCSETLG :
720 		if (!(mode & FWRITE))
721 			error = EPERM;
722 		else {
723 			BCOPYIN((char *)data,
724 				       (char *)&ifs->ifs_nat_logging,
725 				sizeof(ifs->ifs_nat_logging));
726 		}
727 		break;
728 	case SIOCGETLG :
729 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
730 			sizeof(ifs->ifs_nat_logging));
731 		break;
732 	case FIONREAD :
733 		arg = ifs->ifs_iplused[IPL_LOGNAT];
734 		BCOPYOUT(&arg, data, sizeof(arg));
735 		break;
736 #endif
737 	case SIOCADNAT :
738 		if (!(mode & FWRITE)) {
739 			error = EPERM;
740 		} else if (n != NULL) {
741 			error = EEXIST;
742 		} else if (nt == NULL) {
743 			error = ENOMEM;
744 		}
745 		if (error != 0) {
746 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
747 			break;
748 		}
749 		bcopy((char *)nat, (char *)nt, sizeof(*n));
750 		error = nat_siocaddnat(nt, np, getlock, ifs);
751 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
752 		if (error == 0)
753 			nt = NULL;
754 		break;
755 	case SIOCRMNAT :
756 		if (!(mode & FWRITE)) {
757 			error = EPERM;
758 			n = NULL;
759 		} else if (n == NULL) {
760 			error = ESRCH;
761 		}
762 
763 		if (error != 0) {
764 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
765 			break;
766 		}
767 		nat_siocdelnat(n, np, getlock, ifs);
768 
769 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
770 		n = NULL;
771 		break;
772 	case SIOCGNATS :
773 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
774 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
775 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
776 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
777 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
778 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
779 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
780 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
781 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
782 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
783 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
784 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
785 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
786 		break;
787 	case SIOCGNATL :
788 	    {
789 		natlookup_t nl;
790 
791 		if (getlock) {
792 			READ_ENTER(&ifs->ifs_ipf_nat);
793 		}
794 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
795 		if (error == 0) {
796 			if (nat_lookupredir(&nl, ifs) != NULL) {
797 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
798 			} else {
799 				error = ESRCH;
800 			}
801 		}
802 		if (getlock) {
803 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
804 		}
805 		break;
806 	    }
807 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
808 		if (!(mode & FWRITE)) {
809 			error = EPERM;
810 			break;
811 		}
812 		if (getlock) {
813 			WRITE_ENTER(&ifs->ifs_ipf_nat);
814 		}
815 		error = 0;
816 		if (arg == 0)
817 			ret = nat_flushtable(ifs);
818 		else if (arg == 1)
819 			ret = nat_clearlist(ifs);
820 		else if (arg >= 2 && arg <= 4)
821 			ret = nat_extraflush(arg - 2, ifs);
822 		else
823 			error = EINVAL;
824 		if (getlock) {
825 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
826 		}
827 		if (error == 0) {
828 			BCOPYOUT(&ret, data, sizeof(ret));
829 		}
830 		break;
831 	case SIOCPROXY :
832 		error = appr_ioctl(data, cmd, mode, ifs);
833 		break;
834 	case SIOCSTLCK :
835 		if (!(mode & FWRITE)) {
836 			error = EPERM;
837 		} else {
838 			fr_lock(data, &ifs->ifs_fr_nat_lock);
839 		}
840 		break;
841 	case SIOCSTPUT :
842 		if ((mode & FWRITE) != 0) {
843 			error = fr_natputent(data, getlock, ifs);
844 		} else {
845 			error = EACCES;
846 		}
847 		break;
848 	case SIOCSTGSZ :
849 		if (ifs->ifs_fr_nat_lock) {
850 			if (getlock) {
851 				READ_ENTER(&ifs->ifs_ipf_nat);
852 			}
853 			error = fr_natgetsz(data, ifs);
854 			if (getlock) {
855 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
856 			}
857 		} else
858 			error = EACCES;
859 		break;
860 	case SIOCSTGET :
861 		if (ifs->ifs_fr_nat_lock) {
862 			if (getlock) {
863 				READ_ENTER(&ifs->ifs_ipf_nat);
864 			}
865 			error = fr_natgetent(data, ifs);
866 			if (getlock) {
867 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
868 			}
869 		} else
870 			error = EACCES;
871 		break;
872 	case SIOCIPFDELTOK :
873 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
874 		error = ipf_deltoken(arg, uid, ctx, ifs);
875 		break;
876 	default :
877 		error = EINVAL;
878 		break;
879 	}
880 done:
881 	if (nt)
882 		KFREE(nt);
883 	return error;
884 }
885 
886 
887 /* ------------------------------------------------------------------------ */
888 /* Function:    nat_siocaddnat                                              */
889 /* Returns:     int - 0 == success, != 0 == failure                         */
890 /* Parameters:  n(I)       - pointer to new NAT rule                        */
891 /*              np(I)      - pointer to where to insert new NAT rule        */
892 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
893 /* Mutex Locks: ipf_natio                                                   */
894 /*                                                                          */
895 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
896 /* from information passed to the kernel, then add it  to the appropriate   */
897 /* NAT rule table(s).                                                       */
898 /* ------------------------------------------------------------------------ */
899 static int nat_siocaddnat(n, np, getlock, ifs)
900 ipnat_t *n, **np;
901 int getlock;
902 ipf_stack_t *ifs;
903 {
904 	int error = 0, i, j;
905 
906 	if (nat_resolverule(n, ifs) != 0)
907 		return ENOENT;
908 
909 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
910 		return EINVAL;
911 
912 	n->in_use = 0;
913 	if (n->in_redir & NAT_MAPBLK)
914 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
915 	else if (n->in_flags & IPN_AUTOPORTMAP)
916 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
917 	else if (n->in_flags & IPN_IPRANGE)
918 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
919 	else if (n->in_flags & IPN_SPLIT)
920 		n->in_space = 2;
921 	else if (n->in_outmsk != 0)
922 		n->in_space = ~ntohl(n->in_outmsk);
923 	else
924 		n->in_space = 1;
925 
926 	/*
927 	 * Calculate the number of valid IP addresses in the output
928 	 * mapping range.  In all cases, the range is inclusive of
929 	 * the start and ending IP addresses.
930 	 * If to a CIDR address, lose 2: broadcast + network address
931 	 *                               (so subtract 1)
932 	 * If to a range, add one.
933 	 * If to a single IP address, set to 1.
934 	 */
935 	if (n->in_space) {
936 		if ((n->in_flags & IPN_IPRANGE) != 0)
937 			n->in_space += 1;
938 		else
939 			n->in_space -= 1;
940 	} else
941 		n->in_space = 1;
942 
943 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
944 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
945 		n->in_nip = ntohl(n->in_outip) + 1;
946 	else if ((n->in_flags & IPN_SPLIT) &&
947 		 (n->in_redir & NAT_REDIRECT))
948 		n->in_nip = ntohl(n->in_inip);
949 	else
950 		n->in_nip = ntohl(n->in_outip);
951 	if (n->in_redir & NAT_MAP) {
952 		n->in_pnext = ntohs(n->in_pmin);
953 		/*
954 		 * Multiply by the number of ports made available.
955 		 */
956 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
957 			n->in_space *= (ntohs(n->in_pmax) -
958 					ntohs(n->in_pmin) + 1);
959 			/*
960 			 * Because two different sources can map to
961 			 * different destinations but use the same
962 			 * local IP#/port #.
963 			 * If the result is smaller than in_space, then
964 			 * we may have wrapped around 32bits.
965 			 */
966 			i = n->in_inmsk;
967 			if ((i != 0) && (i != 0xffffffff)) {
968 				j = n->in_space * (~ntohl(i) + 1);
969 				if (j >= n->in_space)
970 					n->in_space = j;
971 				else
972 					n->in_space = 0xffffffff;
973 			}
974 		}
975 		/*
976 		 * If no protocol is specified, multiple by 256 to allow for
977 		 * at least one IP:IP mapping per protocol.
978 		 */
979 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
980 				j = n->in_space * 256;
981 				if (j >= n->in_space)
982 					n->in_space = j;
983 				else
984 					n->in_space = 0xffffffff;
985 		}
986 	}
987 
988 	/* Otherwise, these fields are preset */
989 
990 	if (getlock) {
991 		WRITE_ENTER(&ifs->ifs_ipf_nat);
992 	}
993 	n->in_next = NULL;
994 	*np = n;
995 
996 	if (n->in_age[0] != 0)
997 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
998 						  n->in_age[0], ifs);
999 
1000 	if (n->in_age[1] != 0)
1001 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1002 						  n->in_age[1], ifs);
1003 
1004 	if (n->in_redir & NAT_REDIRECT) {
1005 		n->in_flags &= ~IPN_NOTDST;
1006 		nat_addrdr(n, ifs);
1007 	}
1008 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1009 		n->in_flags &= ~IPN_NOTSRC;
1010 		nat_addnat(n, ifs);
1011 	}
1012 	n = NULL;
1013 	ifs->ifs_nat_stats.ns_rules++;
1014 	if (getlock) {
1015 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1016 	}
1017 
1018 	return error;
1019 }
1020 
1021 
1022 /* ------------------------------------------------------------------------ */
1023 /* Function:    nat_resolvrule                                              */
1024 /* Returns:     int - 0 == success, -1 == failure                           */
1025 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1026 /*                                                                          */
1027 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1028 /* any specified interfaces and proxy labels, and determines whether or not */
1029 /* all proxy labels are correctly specified.				    */
1030 /*									    */
1031 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1032 /* ------------------------------------------------------------------------ */
1033 static int nat_resolverule(n, ifs)
1034 ipnat_t *n;
1035 ipf_stack_t *ifs;
1036 {
1037 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1038 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1039 
1040 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1041 	if (n->in_ifnames[1][0] == '\0') {
1042 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1043 		n->in_ifps[1] = n->in_ifps[0];
1044 	} else {
1045 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1046 	}
1047 
1048 	if (n->in_plabel[0] != '\0') {
1049 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1050 		if (n->in_apr == NULL)
1051 			return -1;
1052 	}
1053 	return 0;
1054 }
1055 
1056 
1057 /* ------------------------------------------------------------------------ */
1058 /* Function:    nat_siocdelnat                                              */
1059 /* Returns:     int - 0 == success, != 0 == failure                         */
1060 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1061 /*              np(I)      - pointer to where to insert new NAT rule        */
1062 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1063 /* Mutex Locks: ipf_natio                                                   */
1064 /*                                                                          */
1065 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1066 /* from information passed to the kernel, then add it  to the appropriate   */
1067 /* NAT rule table(s).                                                       */
1068 /* ------------------------------------------------------------------------ */
1069 static void nat_siocdelnat(n, np, getlock, ifs)
1070 ipnat_t *n, **np;
1071 int getlock;
1072 ipf_stack_t *ifs;
1073 {
1074 	if (getlock) {
1075 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1076 	}
1077 	if (n->in_redir & NAT_REDIRECT)
1078 		nat_delrdr(n);
1079 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1080 		nat_delnat(n);
1081 	if (ifs->ifs_nat_list == NULL) {
1082 		ifs->ifs_nat_masks = 0;
1083 		ifs->ifs_rdr_masks = 0;
1084 	}
1085 
1086 	if (n->in_tqehead[0] != NULL) {
1087 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1088 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1089 		}
1090 	}
1091 
1092 	if (n->in_tqehead[1] != NULL) {
1093 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1094 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1095 		}
1096 	}
1097 
1098 	*np = n->in_next;
1099 
1100 	if (n->in_use == 0) {
1101 		if (n->in_apr)
1102 			appr_free(n->in_apr);
1103 		KFREE(n);
1104 		ifs->ifs_nat_stats.ns_rules--;
1105 	} else {
1106 		n->in_flags |= IPN_DELETE;
1107 		n->in_next = NULL;
1108 	}
1109 	if (getlock) {
1110 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1111 	}
1112 }
1113 
1114 
1115 /* ------------------------------------------------------------------------ */
1116 /* Function:    fr_natgetsz                                                 */
1117 /* Returns:     int - 0 == success, != 0 is the error value.                */
1118 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1119 /*                        get the size of.                                  */
1120 /*                                                                          */
1121 /* Handle SIOCSTGSZ.                                                        */
1122 /* Return the size of the nat list entry to be copied back to user space.   */
1123 /* The size of the entry is stored in the ng_sz field and the enture natget */
1124 /* structure is copied back to the user.                                    */
1125 /* ------------------------------------------------------------------------ */
1126 static int fr_natgetsz(data, ifs)
1127 caddr_t data;
1128 ipf_stack_t *ifs;
1129 {
1130 	ap_session_t *aps;
1131 	nat_t *nat, *n;
1132 	natget_t ng;
1133 
1134 	BCOPYIN(data, &ng, sizeof(ng));
1135 
1136 	nat = ng.ng_ptr;
1137 	if (!nat) {
1138 		nat = ifs->ifs_nat_instances;
1139 		ng.ng_sz = 0;
1140 		/*
1141 		 * Empty list so the size returned is 0.  Simple.
1142 		 */
1143 		if (nat == NULL) {
1144 			BCOPYOUT(&ng, data, sizeof(ng));
1145 			return 0;
1146 		}
1147 	} else {
1148 		/*
1149 		 * Make sure the pointer we're copying from exists in the
1150 		 * current list of entries.  Security precaution to prevent
1151 		 * copying of random kernel data.
1152 		 */
1153 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1154 			if (n == nat)
1155 				break;
1156 		if (!n)
1157 			return ESRCH;
1158 	}
1159 
1160 	/*
1161 	 * Incluse any space required for proxy data structures.
1162 	 */
1163 	ng.ng_sz = sizeof(nat_save_t);
1164 	aps = nat->nat_aps;
1165 	if (aps != NULL) {
1166 		ng.ng_sz += sizeof(ap_session_t) - 4;
1167 		if (aps->aps_data != 0)
1168 			ng.ng_sz += aps->aps_psiz;
1169 	}
1170 
1171 	BCOPYOUT(&ng, data, sizeof(ng));
1172 	return 0;
1173 }
1174 
1175 
1176 /* ------------------------------------------------------------------------ */
1177 /* Function:    fr_natgetent                                                */
1178 /* Returns:     int - 0 == success, != 0 is the error value.                */
1179 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1180 /*                        to NAT structure to copy out.                     */
1181 /*                                                                          */
1182 /* Handle SIOCSTGET.                                                        */
1183 /* Copies out NAT entry to user space.  Any additional data held for a      */
1184 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1185 /* ------------------------------------------------------------------------ */
1186 static int fr_natgetent(data, ifs)
1187 caddr_t data;
1188 ipf_stack_t *ifs;
1189 {
1190 	int error, outsize;
1191 	ap_session_t *aps;
1192 	nat_save_t *ipn, ipns;
1193 	nat_t *n, *nat;
1194 
1195 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1196 	if (error != 0)
1197 		return error;
1198 
1199 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1200 		return EINVAL;
1201 
1202 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1203 	if (ipn == NULL)
1204 		return ENOMEM;
1205 
1206 	ipn->ipn_dsize = ipns.ipn_dsize;
1207 	nat = ipns.ipn_next;
1208 	if (nat == NULL) {
1209 		nat = ifs->ifs_nat_instances;
1210 		if (nat == NULL) {
1211 			if (ifs->ifs_nat_instances == NULL)
1212 				error = ENOENT;
1213 			goto finished;
1214 		}
1215 	} else {
1216 		/*
1217 		 * Make sure the pointer we're copying from exists in the
1218 		 * current list of entries.  Security precaution to prevent
1219 		 * copying of random kernel data.
1220 		 */
1221 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1222 			if (n == nat)
1223 				break;
1224 		if (n == NULL) {
1225 			error = ESRCH;
1226 			goto finished;
1227 		}
1228 	}
1229 	ipn->ipn_next = nat->nat_next;
1230 
1231 	/*
1232 	 * Copy the NAT structure.
1233 	 */
1234 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1235 
1236 	/*
1237 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1238 	 */
1239 	if (nat->nat_ptr != NULL)
1240 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1241 		      sizeof(ipn->ipn_ipnat));
1242 
1243 	/*
1244 	 * If we also know the NAT entry has an associated filter rule,
1245 	 * save that too.
1246 	 */
1247 	if (nat->nat_fr != NULL)
1248 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1249 		      sizeof(ipn->ipn_fr));
1250 
1251 	/*
1252 	 * Last but not least, if there is an application proxy session set
1253 	 * up for this NAT entry, then copy that out too, including any
1254 	 * private data saved along side it by the proxy.
1255 	 */
1256 	aps = nat->nat_aps;
1257 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1258 	if (aps != NULL) {
1259 		char *s;
1260 
1261 		if (outsize < sizeof(*aps)) {
1262 			error = ENOBUFS;
1263 			goto finished;
1264 		}
1265 
1266 		s = ipn->ipn_data;
1267 		bcopy((char *)aps, s, sizeof(*aps));
1268 		s += sizeof(*aps);
1269 		outsize -= sizeof(*aps);
1270 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1271 			bcopy(aps->aps_data, s, aps->aps_psiz);
1272 		else
1273 			error = ENOBUFS;
1274 	}
1275 	if (error == 0) {
1276 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1277 	}
1278 
1279 finished:
1280 	if (ipn != NULL) {
1281 		KFREES(ipn, ipns.ipn_dsize);
1282 	}
1283 	return error;
1284 }
1285 
1286 
1287 /* ------------------------------------------------------------------------ */
1288 /* Function:    fr_natputent                                                */
1289 /* Returns:     int - 0 == success, != 0 is the error value.                */
1290 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1291 /*                            structure information to load into the kernel */
1292 /*              getlock(I) - flag indicating whether or not a write lock    */
1293 /*                           on ipf_nat is already held.                    */
1294 /*                                                                          */
1295 /* Handle SIOCSTPUT.                                                        */
1296 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1297 /* firewall rule data structures, if pointers to them indicate so.          */
1298 /* ------------------------------------------------------------------------ */
1299 static int fr_natputent(data, getlock, ifs)
1300 caddr_t data;
1301 int getlock;
1302 ipf_stack_t *ifs;
1303 {
1304 	nat_save_t ipn, *ipnn;
1305 	ap_session_t *aps;
1306 	nat_t *n, *nat;
1307 	frentry_t *fr;
1308 	fr_info_t fin;
1309 	ipnat_t *in;
1310 	int error;
1311 
1312 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1313 	if (error != 0)
1314 		return error;
1315 
1316 	/*
1317 	 * Trigger automatic call to nat_extraflush() if the
1318 	 * table has reached capcity specified by hi watermark.
1319 	 */
1320 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1321 		ifs->ifs_nat_doflush = 1;
1322 
1323 	/*
1324 	 * Initialise early because of code at junkput label.
1325 	 */
1326 	in = NULL;
1327 	aps = NULL;
1328 	nat = NULL;
1329 	ipnn = NULL;
1330 
1331 	/*
1332 	 * New entry, copy in the rest of the NAT entry if it's size is more
1333 	 * than just the nat_t structure.
1334 	 */
1335 	fr = NULL;
1336 	if (ipn.ipn_dsize > sizeof(ipn)) {
1337 		if (ipn.ipn_dsize > 81920) {
1338 			error = ENOMEM;
1339 			goto junkput;
1340 		}
1341 
1342 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1343 		if (ipnn == NULL)
1344 			return ENOMEM;
1345 
1346 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1347 		if (error != 0) {
1348 			error = EFAULT;
1349 			goto junkput;
1350 		}
1351 	} else
1352 		ipnn = &ipn;
1353 
1354 	KMALLOC(nat, nat_t *);
1355 	if (nat == NULL) {
1356 		error = ENOMEM;
1357 		goto junkput;
1358 	}
1359 
1360 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1361 	/*
1362 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1363 	 */
1364 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1365 	nat->nat_tqe.tqe_pnext = NULL;
1366 	nat->nat_tqe.tqe_next = NULL;
1367 	nat->nat_tqe.tqe_ifq = NULL;
1368 	nat->nat_tqe.tqe_parent = nat;
1369 
1370 	/*
1371 	 * Restore the rule associated with this nat session
1372 	 */
1373 	in = ipnn->ipn_nat.nat_ptr;
1374 	if (in != NULL) {
1375 		KMALLOC(in, ipnat_t *);
1376 		nat->nat_ptr = in;
1377 		if (in == NULL) {
1378 			error = ENOMEM;
1379 			goto junkput;
1380 		}
1381 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1382 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1383 		in->in_use = 1;
1384 		in->in_flags |= IPN_DELETE;
1385 
1386 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1387 
1388 		if (nat_resolverule(in, ifs) != 0) {
1389 			error = ESRCH;
1390 			goto junkput;
1391 		}
1392 	}
1393 
1394 	/*
1395 	 * Check that the NAT entry doesn't already exist in the kernel.
1396 	 */
1397 	bzero((char *)&fin, sizeof(fin));
1398 	fin.fin_p = nat->nat_p;
1399 	fin.fin_ifs = ifs;
1400 	if (nat->nat_dir == NAT_OUTBOUND) {
1401 		fin.fin_data[0] = ntohs(nat->nat_oport);
1402 		fin.fin_data[1] = ntohs(nat->nat_outport);
1403 		fin.fin_ifp = nat->nat_ifps[0];
1404 		if (getlock) {
1405 			READ_ENTER(&ifs->ifs_ipf_nat);
1406 		}
1407 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1408 			nat->nat_oip, nat->nat_outip);
1409 		if (getlock) {
1410 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1411 		}
1412 		if (n != NULL) {
1413 			error = EEXIST;
1414 			goto junkput;
1415 		}
1416 	} else if (nat->nat_dir == NAT_INBOUND) {
1417 		fin.fin_data[0] = ntohs(nat->nat_inport);
1418 		fin.fin_data[1] = ntohs(nat->nat_oport);
1419 		fin.fin_ifp = nat->nat_ifps[1];
1420 		if (getlock) {
1421 			READ_ENTER(&ifs->ifs_ipf_nat);
1422 		}
1423 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1424 			nat->nat_inip, nat->nat_oip);
1425 		if (getlock) {
1426 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1427 		}
1428 		if (n != NULL) {
1429 			error = EEXIST;
1430 			goto junkput;
1431 		}
1432 	} else {
1433 		error = EINVAL;
1434 		goto junkput;
1435 	}
1436 
1437 	/*
1438 	 * Restore ap_session_t structure.  Include the private data allocated
1439 	 * if it was there.
1440 	 */
1441 	aps = nat->nat_aps;
1442 	if (aps != NULL) {
1443 		KMALLOC(aps, ap_session_t *);
1444 		nat->nat_aps = aps;
1445 		if (aps == NULL) {
1446 			error = ENOMEM;
1447 			goto junkput;
1448 		}
1449 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1450 		if (in != NULL)
1451 			aps->aps_apr = in->in_apr;
1452 		else
1453 			aps->aps_apr = NULL;
1454 		if (aps->aps_psiz != 0) {
1455 			if (aps->aps_psiz > 81920) {
1456 				error = ENOMEM;
1457 				goto junkput;
1458 			}
1459 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1460 			if (aps->aps_data == NULL) {
1461 				error = ENOMEM;
1462 				goto junkput;
1463 			}
1464 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1465 			      aps->aps_psiz);
1466 		} else {
1467 			aps->aps_psiz = 0;
1468 			aps->aps_data = NULL;
1469 		}
1470 	}
1471 
1472 	/*
1473 	 * If there was a filtering rule associated with this entry then
1474 	 * build up a new one.
1475 	 */
1476 	fr = nat->nat_fr;
1477 	if (fr != NULL) {
1478 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1479 			KMALLOC(fr, frentry_t *);
1480 			nat->nat_fr = fr;
1481 			if (fr == NULL) {
1482 				error = ENOMEM;
1483 				goto junkput;
1484 			}
1485 			ipnn->ipn_nat.nat_fr = fr;
1486 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1487 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1488 
1489 			fr->fr_ref = 1;
1490 			fr->fr_dsize = 0;
1491 			fr->fr_data = NULL;
1492 			fr->fr_type = FR_T_NONE;
1493 
1494 			MUTEX_NUKE(&fr->fr_lock);
1495 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1496 		} else {
1497 			if (getlock) {
1498 				READ_ENTER(&ifs->ifs_ipf_nat);
1499 			}
1500 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1501 				if (n->nat_fr == fr)
1502 					break;
1503 
1504 			if (n != NULL) {
1505 				MUTEX_ENTER(&fr->fr_lock);
1506 				fr->fr_ref++;
1507 				MUTEX_EXIT(&fr->fr_lock);
1508 			}
1509 			if (getlock) {
1510 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1511 			}
1512 			if (!n) {
1513 				error = ESRCH;
1514 				goto junkput;
1515 			}
1516 		}
1517 	}
1518 
1519 	if (ipnn != &ipn) {
1520 		KFREES(ipnn, ipn.ipn_dsize);
1521 		ipnn = NULL;
1522 	}
1523 
1524 	if (getlock) {
1525 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1526 	}
1527 	error = nat_insert(nat, nat->nat_rev, ifs);
1528 	if ((error == 0) && (aps != NULL)) {
1529 		aps->aps_next = ifs->ifs_ap_sess_list;
1530 		ifs->ifs_ap_sess_list = aps;
1531 	}
1532 	if (getlock) {
1533 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1534 	}
1535 
1536 	if (error == 0)
1537 		return 0;
1538 
1539 	error = ENOMEM;
1540 
1541 junkput:
1542 	if (fr != NULL)
1543 		(void) fr_derefrule(&fr, ifs);
1544 
1545 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1546 		KFREES(ipnn, ipn.ipn_dsize);
1547 	}
1548 	if (nat != NULL) {
1549 		if (aps != NULL) {
1550 			if (aps->aps_data != NULL) {
1551 				KFREES(aps->aps_data, aps->aps_psiz);
1552 			}
1553 			KFREE(aps);
1554 		}
1555 		if (in != NULL) {
1556 			if (in->in_apr)
1557 				appr_free(in->in_apr);
1558 			KFREE(in);
1559 		}
1560 		KFREE(nat);
1561 	}
1562 	return error;
1563 }
1564 
1565 
1566 /* ------------------------------------------------------------------------ */
1567 /* Function:    nat_delete                                                  */
1568 /* Returns:     Nil                                                         */
1569 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1570 /*              logtype(I) - type of LOG record to create before deleting   */
1571 /* Write Lock:  ipf_nat                                                     */
1572 /*                                                                          */
1573 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1574 /* enabled then generate a NAT log record for this event.                   */
1575 /* ------------------------------------------------------------------------ */
1576 static void nat_delete(nat, logtype, ifs)
1577 struct nat *nat;
1578 int logtype;
1579 ipf_stack_t *ifs;
1580 {
1581 	struct ipnat *ipn;
1582 
1583 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1584 		nat_log(nat, logtype, ifs);
1585 
1586 	/*
1587 	 * Take it as a general indication that all the pointers are set if
1588 	 * nat_pnext is set.
1589 	 */
1590 	if (nat->nat_pnext != NULL) {
1591 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1592 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1593 
1594 		*nat->nat_pnext = nat->nat_next;
1595 		if (nat->nat_next != NULL) {
1596 			nat->nat_next->nat_pnext = nat->nat_pnext;
1597 			nat->nat_next = NULL;
1598 		}
1599 		nat->nat_pnext = NULL;
1600 
1601 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1602 		if (nat->nat_hnext[0] != NULL) {
1603 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1604 			nat->nat_hnext[0] = NULL;
1605 		}
1606 		nat->nat_phnext[0] = NULL;
1607 
1608 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1609 		if (nat->nat_hnext[1] != NULL) {
1610 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1611 			nat->nat_hnext[1] = NULL;
1612 		}
1613 		nat->nat_phnext[1] = NULL;
1614 
1615 		if ((nat->nat_flags & SI_WILDP) != 0)
1616 			ifs->ifs_nat_stats.ns_wilds--;
1617 	}
1618 
1619 	if (nat->nat_me != NULL) {
1620 		*nat->nat_me = NULL;
1621 		nat->nat_me = NULL;
1622 	}
1623 
1624 	fr_deletequeueentry(&nat->nat_tqe);
1625 
1626 	MUTEX_ENTER(&nat->nat_lock);
1627 	if (nat->nat_ref > 1) {
1628 		nat->nat_ref--;
1629 		MUTEX_EXIT(&nat->nat_lock);
1630 		return;
1631 	}
1632 	MUTEX_EXIT(&nat->nat_lock);
1633 
1634 	/*
1635 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1636 	 */
1637 	nat->nat_ref = 0;
1638 
1639 #ifdef	IPFILTER_SYNC
1640 	if (nat->nat_sync)
1641 		ipfsync_del(nat->nat_sync);
1642 #endif
1643 
1644 	if (nat->nat_fr != NULL)
1645 		(void)fr_derefrule(&nat->nat_fr, ifs);
1646 
1647 	if (nat->nat_hm != NULL)
1648 		fr_hostmapdel(&nat->nat_hm);
1649 
1650 	/*
1651 	 * If there is an active reference from the nat entry to its parent
1652 	 * rule, decrement the rule's reference count and free it too if no
1653 	 * longer being used.
1654 	 */
1655 	ipn = nat->nat_ptr;
1656 	if (ipn != NULL) {
1657 		ipn->in_space++;
1658 		ipn->in_use--;
1659 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1660 			if (ipn->in_apr)
1661 				appr_free(ipn->in_apr);
1662 			KFREE(ipn);
1663 			ifs->ifs_nat_stats.ns_rules--;
1664 		}
1665 	}
1666 
1667 	MUTEX_DESTROY(&nat->nat_lock);
1668 
1669 	aps_free(nat->nat_aps, ifs);
1670 	ifs->ifs_nat_stats.ns_inuse--;
1671 
1672 	/*
1673 	 * If there's a fragment table entry too for this nat entry, then
1674 	 * dereference that as well.  This is after nat_lock is released
1675 	 * because of Tru64.
1676 	 */
1677 	fr_forgetnat((void *)nat, ifs);
1678 
1679 	KFREE(nat);
1680 }
1681 
1682 
1683 /* ------------------------------------------------------------------------ */
1684 /* Function:    nat_flushtable                                              */
1685 /* Returns:     int - number of NAT rules deleted                           */
1686 /* Parameters:  Nil                                                         */
1687 /*                                                                          */
1688 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1689 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1690 /* ------------------------------------------------------------------------ */
1691 /*
1692  * nat_flushtable - clear the NAT table of all mapping entries.
1693  */
1694 static int nat_flushtable(ifs)
1695 ipf_stack_t *ifs;
1696 {
1697 	nat_t *nat;
1698 	int j = 0;
1699 
1700 	/*
1701 	 * ALL NAT mappings deleted, so lets just make the deletions
1702 	 * quicker.
1703 	 */
1704 	if (ifs->ifs_nat_table[0] != NULL)
1705 		bzero((char *)ifs->ifs_nat_table[0],
1706 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1707 	if (ifs->ifs_nat_table[1] != NULL)
1708 		bzero((char *)ifs->ifs_nat_table[1],
1709 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1710 
1711 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1712 		nat_delete(nat, NL_FLUSH, ifs);
1713 		j++;
1714 	}
1715 
1716 	ifs->ifs_nat_stats.ns_inuse = 0;
1717 	return j;
1718 }
1719 
1720 
1721 /* ------------------------------------------------------------------------ */
1722 /* Function:    nat_clearlist                                               */
1723 /* Returns:     int - number of NAT/RDR rules deleted                       */
1724 /* Parameters:  Nil                                                         */
1725 /*                                                                          */
1726 /* Delete all rules in the current list of rules.  There is nothing elegant */
1727 /* about this cleanup: simply free all entries on the list of rules and     */
1728 /* clear out the tables used for hashed NAT rule lookups.                   */
1729 /* ------------------------------------------------------------------------ */
1730 static int nat_clearlist(ifs)
1731 ipf_stack_t *ifs;
1732 {
1733 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1734 	int i = 0;
1735 
1736 	if (ifs->ifs_nat_rules != NULL)
1737 		bzero((char *)ifs->ifs_nat_rules,
1738 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1739 	if (ifs->ifs_rdr_rules != NULL)
1740 		bzero((char *)ifs->ifs_rdr_rules,
1741 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1742 
1743 	while ((n = *np) != NULL) {
1744 		*np = n->in_next;
1745 		if (n->in_use == 0) {
1746 			if (n->in_apr != NULL)
1747 				appr_free(n->in_apr);
1748 			KFREE(n);
1749 			ifs->ifs_nat_stats.ns_rules--;
1750 		} else {
1751 			n->in_flags |= IPN_DELETE;
1752 			n->in_next = NULL;
1753 		}
1754 		i++;
1755 	}
1756 	ifs->ifs_nat_masks = 0;
1757 	ifs->ifs_rdr_masks = 0;
1758 	return i;
1759 }
1760 
1761 
1762 /* ------------------------------------------------------------------------ */
1763 /* Function:    nat_newmap                                                  */
1764 /* Returns:     int - -1 == error, 0 == success                             */
1765 /* Parameters:  fin(I) - pointer to packet information                      */
1766 /*              nat(I) - pointer to NAT entry                               */
1767 /*              ni(I)  - pointer to structure with misc. information needed */
1768 /*                       to create new NAT entry.                           */
1769 /*                                                                          */
1770 /* Given an empty NAT structure, populate it with new information about a   */
1771 /* new NAT session, as defined by the matching NAT rule.                    */
1772 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1773 /* to the new IP address for the translation.                               */
1774 /* ------------------------------------------------------------------------ */
1775 static INLINE int nat_newmap(fin, nat, ni)
1776 fr_info_t *fin;
1777 nat_t *nat;
1778 natinfo_t *ni;
1779 {
1780 	u_short st_port, dport, sport, port, sp, dp;
1781 	struct in_addr in, inb;
1782 	hostmap_t *hm;
1783 	u_32_t flags;
1784 	u_32_t st_ip;
1785 	ipnat_t *np;
1786 	nat_t *natl;
1787 	int l;
1788 	ipf_stack_t *ifs = fin->fin_ifs;
1789 
1790 	/*
1791 	 * If it's an outbound packet which doesn't match any existing
1792 	 * record, then create a new port
1793 	 */
1794 	l = 0;
1795 	hm = NULL;
1796 	np = ni->nai_np;
1797 	st_ip = np->in_nip;
1798 	st_port = np->in_pnext;
1799 	flags = ni->nai_flags;
1800 	sport = ni->nai_sport;
1801 	dport = ni->nai_dport;
1802 
1803 	/*
1804 	 * Do a loop until we either run out of entries to try or we find
1805 	 * a NAT mapping that isn't currently being used.  This is done
1806 	 * because the change to the source is not (usually) being fixed.
1807 	 */
1808 	do {
1809 		port = 0;
1810 		in.s_addr = htonl(np->in_nip);
1811 		if (l == 0) {
1812 			/*
1813 			 * Check to see if there is an existing NAT
1814 			 * setup for this IP address pair.
1815 			 */
1816 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1817 					 in, 0, ifs);
1818 			if (hm != NULL)
1819 				in.s_addr = hm->hm_mapip.s_addr;
1820 		} else if ((l == 1) && (hm != NULL)) {
1821 			fr_hostmapdel(&hm);
1822 		}
1823 		in.s_addr = ntohl(in.s_addr);
1824 
1825 		nat->nat_hm = hm;
1826 
1827 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1828 			if (l > 0)
1829 				return -1;
1830 		}
1831 
1832 		if (np->in_redir == NAT_BIMAP &&
1833 		    np->in_inmsk == np->in_outmsk) {
1834 			/*
1835 			 * map the address block in a 1:1 fashion
1836 			 */
1837 			in.s_addr = np->in_outip;
1838 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1839 			in.s_addr = ntohl(in.s_addr);
1840 
1841 		} else if (np->in_redir & NAT_MAPBLK) {
1842 			if ((l >= np->in_ppip) || ((l > 0) &&
1843 			     !(flags & IPN_TCPUDP)))
1844 				return -1;
1845 			/*
1846 			 * map-block - Calculate destination address.
1847 			 */
1848 			in.s_addr = ntohl(fin->fin_saddr);
1849 			in.s_addr &= ntohl(~np->in_inmsk);
1850 			inb.s_addr = in.s_addr;
1851 			in.s_addr /= np->in_ippip;
1852 			in.s_addr &= ntohl(~np->in_outmsk);
1853 			in.s_addr += ntohl(np->in_outip);
1854 			/*
1855 			 * Calculate destination port.
1856 			 */
1857 			if ((flags & IPN_TCPUDP) &&
1858 			    (np->in_ppip != 0)) {
1859 				port = ntohs(sport) + l;
1860 				port %= np->in_ppip;
1861 				port += np->in_ppip *
1862 					(inb.s_addr % np->in_ippip);
1863 				port += MAPBLK_MINPORT;
1864 				port = htons(port);
1865 			}
1866 
1867 		} else if ((np->in_outip == 0) &&
1868 			   (np->in_outmsk == 0xffffffff)) {
1869 			/*
1870 			 * 0/32 - use the interface's IP address.
1871 			 */
1872 			if ((l > 0) ||
1873 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1874 				       &in, NULL, fin->fin_ifs) == -1)
1875 				return -1;
1876 			in.s_addr = ntohl(in.s_addr);
1877 
1878 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1879 			/*
1880 			 * 0/0 - use the original source address/port.
1881 			 */
1882 			if (l > 0)
1883 				return -1;
1884 			in.s_addr = ntohl(fin->fin_saddr);
1885 
1886 		} else if ((np->in_outmsk != 0xffffffff) &&
1887 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1888 			np->in_nip++;
1889 
1890 		natl = NULL;
1891 
1892 		if ((flags & IPN_TCPUDP) &&
1893 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1894 		    (np->in_flags & IPN_AUTOPORTMAP)) {
1895 			/*
1896 			 * "ports auto" (without map-block)
1897 			 */
1898 			if ((l > 0) && (l % np->in_ppip == 0)) {
1899 				if (l > np->in_space) {
1900 					return -1;
1901 				} else if ((l > np->in_ppip) &&
1902 					   np->in_outmsk != 0xffffffff)
1903 					np->in_nip++;
1904 			}
1905 			if (np->in_ppip != 0) {
1906 				port = ntohs(sport);
1907 				port += (l % np->in_ppip);
1908 				port %= np->in_ppip;
1909 				port += np->in_ppip *
1910 					(ntohl(fin->fin_saddr) %
1911 					 np->in_ippip);
1912 				port += MAPBLK_MINPORT;
1913 				port = htons(port);
1914 			}
1915 
1916 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1917 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
1918 			/*
1919 			 * Standard port translation.  Select next port.
1920 			 */
1921 			port = htons(np->in_pnext++);
1922 
1923 			if (np->in_pnext > ntohs(np->in_pmax)) {
1924 				np->in_pnext = ntohs(np->in_pmin);
1925 				if (np->in_outmsk != 0xffffffff)
1926 					np->in_nip++;
1927 			}
1928 		}
1929 
1930 		if (np->in_flags & IPN_IPRANGE) {
1931 			if (np->in_nip > ntohl(np->in_outmsk))
1932 				np->in_nip = ntohl(np->in_outip);
1933 		} else {
1934 			if ((np->in_outmsk != 0xffffffff) &&
1935 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1936 			    ntohl(np->in_outip))
1937 				np->in_nip = ntohl(np->in_outip) + 1;
1938 		}
1939 
1940 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
1941 			port = sport;
1942 
1943 		/*
1944 		 * Here we do a lookup of the connection as seen from
1945 		 * the outside.  If an IP# pair already exists, try
1946 		 * again.  So if you have A->B becomes C->B, you can
1947 		 * also have D->E become C->E but not D->B causing
1948 		 * another C->B.  Also take protocol and ports into
1949 		 * account when determining whether a pre-existing
1950 		 * NAT setup will cause an external conflict where
1951 		 * this is appropriate.
1952 		 */
1953 		inb.s_addr = htonl(in.s_addr);
1954 		sp = fin->fin_data[0];
1955 		dp = fin->fin_data[1];
1956 		fin->fin_data[0] = fin->fin_data[1];
1957 		fin->fin_data[1] = htons(port);
1958 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
1959 				    (u_int)fin->fin_p, fin->fin_dst, inb);
1960 		fin->fin_data[0] = sp;
1961 		fin->fin_data[1] = dp;
1962 
1963 		/*
1964 		 * Has the search wrapped around and come back to the
1965 		 * start ?
1966 		 */
1967 		if ((natl != NULL) &&
1968 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1969 		    (np->in_nip != 0) && (st_ip == np->in_nip))
1970 			return -1;
1971 		l++;
1972 	} while (natl != NULL);
1973 
1974 	if (np->in_space > 0)
1975 		np->in_space--;
1976 
1977 	/* Setup the NAT table */
1978 	nat->nat_inip = fin->fin_src;
1979 	nat->nat_outip.s_addr = htonl(in.s_addr);
1980 	nat->nat_oip = fin->fin_dst;
1981 	if (nat->nat_hm == NULL)
1982 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1983 					  nat->nat_outip, 0, ifs);
1984 
1985 	/*
1986 	 * The ICMP checksum does not have a pseudo header containing
1987 	 * the IP addresses
1988 	 */
1989 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1990 	ni->nai_sum2 = LONG_SUM(in.s_addr);
1991 	if ((flags & IPN_TCPUDP)) {
1992 		ni->nai_sum1 += ntohs(sport);
1993 		ni->nai_sum2 += ntohs(port);
1994 	}
1995 
1996 	if (flags & IPN_TCPUDP) {
1997 		nat->nat_inport = sport;
1998 		nat->nat_outport = port;	/* sport */
1999 		nat->nat_oport = dport;
2000 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2001 	} else if (flags & IPN_ICMPQUERY) {
2002 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2003 		nat->nat_inport = port;
2004 		nat->nat_outport = port;
2005 	}
2006 
2007 	ni->nai_ip.s_addr = in.s_addr;
2008 	ni->nai_port = port;
2009 	ni->nai_nport = dport;
2010 	return 0;
2011 }
2012 
2013 
2014 /* ------------------------------------------------------------------------ */
2015 /* Function:    nat_newrdr                                                  */
2016 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2017 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2018 /* Parameters:  fin(I) - pointer to packet information                      */
2019 /*              nat(I) - pointer to NAT entry                               */
2020 /*              ni(I)  - pointer to structure with misc. information needed */
2021 /*                       to create new NAT entry.                           */
2022 /*                                                                          */
2023 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2024 /* to the new IP address for the translation.                               */
2025 /* ------------------------------------------------------------------------ */
2026 static INLINE int nat_newrdr(fin, nat, ni)
2027 fr_info_t *fin;
2028 nat_t *nat;
2029 natinfo_t *ni;
2030 {
2031 	u_short nport, dport, sport;
2032 	struct in_addr in, inb;
2033 	u_short sp, dp;
2034 	hostmap_t *hm;
2035 	u_32_t flags;
2036 	ipnat_t *np;
2037 	nat_t *natl;
2038 	int move;
2039 	ipf_stack_t *ifs = fin->fin_ifs;
2040 
2041 	move = 1;
2042 	hm = NULL;
2043 	in.s_addr = 0;
2044 	np = ni->nai_np;
2045 	flags = ni->nai_flags;
2046 	sport = ni->nai_sport;
2047 	dport = ni->nai_dport;
2048 
2049 	/*
2050 	 * If the matching rule has IPN_STICKY set, then we want to have the
2051 	 * same rule kick in as before.  Why would this happen?  If you have
2052 	 * a collection of rdr rules with "round-robin sticky", the current
2053 	 * packet might match a different one to the previous connection but
2054 	 * we want the same destination to be used.
2055 	 */
2056 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2057 	    (IPN_ROUNDR|IPN_STICKY)) {
2058 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2059 				 (u_32_t)dport, ifs);
2060 		if (hm != NULL) {
2061 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2062 			np = hm->hm_ipnat;
2063 			ni->nai_np = np;
2064 			move = 0;
2065 		}
2066 	}
2067 
2068 	/*
2069 	 * Otherwise, it's an inbound packet. Most likely, we don't
2070 	 * want to rewrite source ports and source addresses. Instead,
2071 	 * we want to rewrite to a fixed internal address and fixed
2072 	 * internal port.
2073 	 */
2074 	if (np->in_flags & IPN_SPLIT) {
2075 		in.s_addr = np->in_nip;
2076 
2077 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2078 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2079 					 in, (u_32_t)dport, ifs);
2080 			if (hm != NULL) {
2081 				in.s_addr = hm->hm_mapip.s_addr;
2082 				move = 0;
2083 			}
2084 		}
2085 
2086 		if (hm == NULL || hm->hm_ref == 1) {
2087 			if (np->in_inip == htonl(in.s_addr)) {
2088 				np->in_nip = ntohl(np->in_inmsk);
2089 				move = 0;
2090 			} else {
2091 				np->in_nip = ntohl(np->in_inip);
2092 			}
2093 		}
2094 
2095 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2096 		/*
2097 		 * 0/32 - use the interface's IP address.
2098 		 */
2099 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2100 			   fin->fin_ifs) == -1)
2101 			return -1;
2102 		in.s_addr = ntohl(in.s_addr);
2103 
2104 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2105 		/*
2106 		 * 0/0 - use the original destination address/port.
2107 		 */
2108 		in.s_addr = ntohl(fin->fin_daddr);
2109 
2110 	} else if (np->in_redir == NAT_BIMAP &&
2111 		   np->in_inmsk == np->in_outmsk) {
2112 		/*
2113 		 * map the address block in a 1:1 fashion
2114 		 */
2115 		in.s_addr = np->in_inip;
2116 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2117 		in.s_addr = ntohl(in.s_addr);
2118 	} else {
2119 		in.s_addr = ntohl(np->in_inip);
2120 	}
2121 
2122 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2123 		nport = dport;
2124 	else {
2125 		/*
2126 		 * Whilst not optimized for the case where
2127 		 * pmin == pmax, the gain is not significant.
2128 		 */
2129 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2130 		    (np->in_pmin != np->in_pmax)) {
2131 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2132 				ntohs(np->in_pnext);
2133 			nport = htons(nport);
2134 		} else
2135 			nport = np->in_pnext;
2136 	}
2137 
2138 	/*
2139 	 * When the redirect-to address is set to 0.0.0.0, just
2140 	 * assume a blank `forwarding' of the packet.  We don't
2141 	 * setup any translation for this either.
2142 	 */
2143 	if (in.s_addr == 0) {
2144 		if (nport == dport)
2145 			return -1;
2146 		in.s_addr = ntohl(fin->fin_daddr);
2147 	}
2148 
2149 	/*
2150 	 * Check to see if this redirect mapping already exists and if
2151 	 * it does, return "failure" (allowing it to be created will just
2152 	 * cause one or both of these "connections" to stop working.)
2153 	 */
2154 	inb.s_addr = htonl(in.s_addr);
2155 	sp = fin->fin_data[0];
2156 	dp = fin->fin_data[1];
2157 	fin->fin_data[1] = fin->fin_data[0];
2158 	fin->fin_data[0] = ntohs(nport);
2159 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2160 		    (u_int)fin->fin_p, inb, fin->fin_src);
2161 	fin->fin_data[0] = sp;
2162 	fin->fin_data[1] = dp;
2163 	if (natl != NULL)
2164 		return (-1);
2165 
2166 	nat->nat_inip.s_addr = htonl(in.s_addr);
2167 	nat->nat_outip = fin->fin_dst;
2168 	nat->nat_oip = fin->fin_src;
2169 
2170 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2171 	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2172 
2173 	ni->nai_ip.s_addr = in.s_addr;
2174 	ni->nai_nport = nport;
2175 	ni->nai_port = sport;
2176 
2177 	if (flags & IPN_TCPUDP) {
2178 		nat->nat_inport = nport;
2179 		nat->nat_outport = dport;
2180 		nat->nat_oport = sport;
2181 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2182 	} else if (flags & IPN_ICMPQUERY) {
2183 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2184 		nat->nat_inport = nport;
2185 		nat->nat_outport = nport;
2186 	}
2187 
2188 	return move;
2189 }
2190 
2191 /* ------------------------------------------------------------------------ */
2192 /* Function:    nat_new                                                     */
2193 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2194 /*                       else pointer to new NAT structure                  */
2195 /* Parameters:  fin(I)       - pointer to packet information                */
2196 /*              np(I)        - pointer to NAT rule                          */
2197 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2198 /*              flags(I)     - flags describing the current packet          */
2199 /*              direction(I) - direction of packet (in/out)                 */
2200 /* Write Lock:  ipf_nat                                                     */
2201 /*                                                                          */
2202 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2203 /* in any way.                                                              */
2204 /*                                                                          */
2205 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2206 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2207 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2208 /* and (3) building that structure and putting it into the NAT table(s).    */
2209 /* ------------------------------------------------------------------------ */
2210 nat_t *nat_new(fin, np, natsave, flags, direction)
2211 fr_info_t *fin;
2212 ipnat_t *np;
2213 nat_t **natsave;
2214 u_int flags;
2215 int direction;
2216 {
2217 	u_short port = 0, sport = 0, dport = 0, nport = 0;
2218 	tcphdr_t *tcp = NULL;
2219 	hostmap_t *hm = NULL;
2220 	struct in_addr in;
2221 	nat_t *nat, *natl;
2222 	u_int nflags;
2223 	natinfo_t ni;
2224 	u_32_t sumd;
2225 	int move;
2226 	ipf_stack_t *ifs = fin->fin_ifs;
2227 
2228 	/*
2229 	 * Trigger automatic call to nat_extraflush() if the
2230 	 * table has reached capcity specified by hi watermark.
2231 	 */
2232 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2233 		ifs->ifs_nat_doflush = 1;
2234 
2235 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2236 		ifs->ifs_nat_stats.ns_memfail++;
2237 		return NULL;
2238 	}
2239 
2240 	move = 1;
2241 	nflags = np->in_flags & flags;
2242 	nflags &= NAT_FROMRULE;
2243 
2244 	ni.nai_np = np;
2245 	ni.nai_nflags = nflags;
2246 	ni.nai_flags = flags;
2247 
2248 	/* Give me a new nat */
2249 	KMALLOC(nat, nat_t *);
2250 	if (nat == NULL) {
2251 		ifs->ifs_nat_stats.ns_memfail++;
2252 		/*
2253 		 * Try to automatically tune the max # of entries in the
2254 		 * table allowed to be less than what will cause kmem_alloc()
2255 		 * to fail and try to eliminate panics due to out of memory
2256 		 * conditions arising.
2257 		 */
2258 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2259 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2260 			printf("ipf_nattable_max reduced to %d\n",
2261 				ifs->ifs_ipf_nattable_max);
2262 		}
2263 		return NULL;
2264 	}
2265 
2266 	if (flags & IPN_TCPUDP) {
2267 		tcp = fin->fin_dp;
2268 		ni.nai_sport = htons(fin->fin_sport);
2269 		ni.nai_dport = htons(fin->fin_dport);
2270 	} else if (flags & IPN_ICMPQUERY) {
2271 		/*
2272 		 * In the ICMP query NAT code, we translate the ICMP id fields
2273 		 * to make them unique. This is indepedent of the ICMP type
2274 		 * (e.g. in the unlikely event that a host sends an echo and
2275 		 * an tstamp request with the same id, both packets will have
2276 		 * their ip address/id field changed in the same way).
2277 		 */
2278 		/* The icmp_id field is used by the sender to identify the
2279 		 * process making the icmp request. (the receiver justs
2280 		 * copies it back in its response). So, it closely matches
2281 		 * the concept of source port. We overlay sport, so we can
2282 		 * maximally reuse the existing code.
2283 		 */
2284 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2285 		ni.nai_dport = ni.nai_sport;
2286 	}
2287 
2288 	bzero((char *)nat, sizeof(*nat));
2289 	nat->nat_flags = flags;
2290 	nat->nat_redir = np->in_redir;
2291 
2292 	if ((flags & NAT_SLAVE) == 0) {
2293 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2294 	}
2295 
2296 	/*
2297 	 * Search the current table for a match.
2298 	 */
2299 	if (direction == NAT_OUTBOUND) {
2300 		/*
2301 		 * We can now arrange to call this for the same connection
2302 		 * because ipf_nat_new doesn't protect the code path into
2303 		 * this function.
2304 		 */
2305 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2306 				     fin->fin_src, fin->fin_dst);
2307 		if (natl != NULL) {
2308 			KFREE(nat);
2309 			nat = natl;
2310 			goto done;
2311 		}
2312 
2313 		move = nat_newmap(fin, nat, &ni);
2314 		if (move == -1)
2315 			goto badnat;
2316 
2317 		np = ni.nai_np;
2318 		in = ni.nai_ip;
2319 	} else {
2320 		/*
2321 		 * NAT_INBOUND is used only for redirects rules
2322 		 */
2323 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2324 				    fin->fin_src, fin->fin_dst);
2325 		if (natl != NULL) {
2326 			KFREE(nat);
2327 			nat = natl;
2328 			goto done;
2329 		}
2330 
2331 		move = nat_newrdr(fin, nat, &ni);
2332 		if (move == -1)
2333 			goto badnat;
2334 
2335 		np = ni.nai_np;
2336 		in = ni.nai_ip;
2337 	}
2338 	port = ni.nai_port;
2339 	nport = ni.nai_nport;
2340 
2341 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2342 		if (np->in_redir == NAT_REDIRECT) {
2343 			nat_delrdr(np);
2344 			nat_addrdr(np, ifs);
2345 		} else if (np->in_redir == NAT_MAP) {
2346 			nat_delnat(np);
2347 			nat_addnat(np, ifs);
2348 		}
2349 	}
2350 
2351 	if (flags & IPN_TCPUDP) {
2352 		sport = ni.nai_sport;
2353 		dport = ni.nai_dport;
2354 	} else if (flags & IPN_ICMPQUERY) {
2355 		sport = ni.nai_sport;
2356 		dport = 0;
2357 	}
2358 
2359 	/*
2360 	 * nat_sumd[0] stores adjustment value including both IP address and
2361 	 * port number changes. nat_sumd[1] stores adjustment value only for
2362 	 * IP address changes, to be used for pseudo header adjustment, in
2363 	 * case hardware partial checksum offload is offered.
2364 	 */
2365 	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2366 	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2367 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
2368 	if (flags & IPN_TCPUDP) {
2369 		ni.nai_sum1 = LONG_SUM(in.s_addr);
2370 		if (direction == NAT_OUTBOUND)
2371 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr));
2372 		else
2373 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr));
2374 
2375 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2376 		nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
2377 	} else
2378 #endif
2379 		nat->nat_sumd[1] = nat->nat_sumd[0];
2380 
2381 	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2382 		if (direction == NAT_OUTBOUND)
2383 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2384 		else
2385 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2386 
2387 		ni.nai_sum2 = LONG_SUM(in.s_addr);
2388 
2389 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2390 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2391 	} else {
2392 		nat->nat_ipsumd = nat->nat_sumd[0];
2393 		if (!(flags & IPN_TCPUDPICMP)) {
2394 			nat->nat_sumd[0] = 0;
2395 			nat->nat_sumd[1] = 0;
2396 		}
2397 	}
2398 
2399 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2400 		goto badnat;
2401 	}
2402 	if (flags & SI_WILDP)
2403 		ifs->ifs_nat_stats.ns_wilds++;
2404 	goto done;
2405 badnat:
2406 	ifs->ifs_nat_stats.ns_badnat++;
2407 	if ((hm = nat->nat_hm) != NULL)
2408 		fr_hostmapdel(&hm);
2409 	KFREE(nat);
2410 	nat = NULL;
2411 done:
2412 	if ((flags & NAT_SLAVE) == 0) {
2413 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2414 	}
2415 	return nat;
2416 }
2417 
2418 
2419 /* ------------------------------------------------------------------------ */
2420 /* Function:    nat_finalise                                                */
2421 /* Returns:     int - 0 == sucess, -1 == failure                            */
2422 /* Parameters:  fin(I) - pointer to packet information                      */
2423 /*              nat(I) - pointer to NAT entry                               */
2424 /*              ni(I)  - pointer to structure with misc. information needed */
2425 /*                       to create new NAT entry.                           */
2426 /* Write Lock:  ipf_nat                                                     */
2427 /*                                                                          */
2428 /* This is the tail end of constructing a new NAT entry and is the same     */
2429 /* for both IPv4 and IPv6.                                                  */
2430 /* ------------------------------------------------------------------------ */
2431 /*ARGSUSED*/
2432 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2433 fr_info_t *fin;
2434 nat_t *nat;
2435 natinfo_t *ni;
2436 tcphdr_t *tcp;
2437 nat_t **natsave;
2438 int direction;
2439 {
2440 	frentry_t *fr;
2441 	ipnat_t *np;
2442 	ipf_stack_t *ifs = fin->fin_ifs;
2443 
2444 	np = ni->nai_np;
2445 
2446 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2447 
2448 #ifdef	IPFILTER_SYNC
2449 	if ((nat->nat_flags & SI_CLONE) == 0)
2450 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2451 #endif
2452 
2453 	nat->nat_me = natsave;
2454 	nat->nat_dir = direction;
2455 	nat->nat_ifps[0] = np->in_ifps[0];
2456 	nat->nat_ifps[1] = np->in_ifps[1];
2457 	nat->nat_ptr = np;
2458 	nat->nat_p = fin->fin_p;
2459 	nat->nat_mssclamp = np->in_mssclamp;
2460 	fr = fin->fin_fr;
2461 	nat->nat_fr = fr;
2462 
2463 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2464 		if (appr_new(fin, nat) == -1)
2465 			return -1;
2466 
2467 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2468 		if (ifs->ifs_nat_logging)
2469 			nat_log(nat, (u_int)np->in_redir, ifs);
2470 		np->in_use++;
2471 		if (fr != NULL) {
2472 			MUTEX_ENTER(&fr->fr_lock);
2473 			fr->fr_ref++;
2474 			MUTEX_EXIT(&fr->fr_lock);
2475 		}
2476 		return 0;
2477 	}
2478 
2479 	/*
2480 	 * nat_insert failed, so cleanup time...
2481 	 */
2482 	return -1;
2483 }
2484 
2485 
2486 /* ------------------------------------------------------------------------ */
2487 /* Function:   nat_insert                                                   */
2488 /* Returns:    int - 0 == sucess, -1 == failure                             */
2489 /* Parameters: nat(I) - pointer to NAT structure                            */
2490 /*             rev(I) - flag indicating forward/reverse direction of packet */
2491 /* Write Lock: ipf_nat                                                      */
2492 /*                                                                          */
2493 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2494 /* list of active NAT entries.  Adjust global counters when complete.       */
2495 /* ------------------------------------------------------------------------ */
2496 int	nat_insert(nat, rev, ifs)
2497 nat_t	*nat;
2498 int	rev;
2499 ipf_stack_t *ifs;
2500 {
2501 	u_int hv1, hv2;
2502 	nat_t **natp;
2503 
2504 	/*
2505 	 * Try and return an error as early as possible, so calculate the hash
2506 	 * entry numbers first and then proceed.
2507 	 */
2508 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2509 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2510 				  0xffffffff);
2511 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2512 				  ifs->ifs_ipf_nattable_sz);
2513 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2514 				  0xffffffff);
2515 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2516 				  ifs->ifs_ipf_nattable_sz);
2517 	} else {
2518 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2519 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2520 				  ifs->ifs_ipf_nattable_sz);
2521 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2522 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2523 				  ifs->ifs_ipf_nattable_sz);
2524 	}
2525 
2526 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2527 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2528 		return -1;
2529 	}
2530 
2531 	nat->nat_hv[0] = hv1;
2532 	nat->nat_hv[1] = hv2;
2533 
2534 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2535 
2536 	nat->nat_rev = rev;
2537 	nat->nat_ref = 1;
2538 	nat->nat_bytes[0] = 0;
2539 	nat->nat_pkts[0] = 0;
2540 	nat->nat_bytes[1] = 0;
2541 	nat->nat_pkts[1] = 0;
2542 
2543 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2544 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2545 
2546 	if (nat->nat_ifnames[1][0] !='\0') {
2547 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2548 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2549 	} else {
2550 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2551 			       LIFNAMSIZ);
2552 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2553 		nat->nat_ifps[1] = nat->nat_ifps[0];
2554 	}
2555 
2556 	nat->nat_next = ifs->ifs_nat_instances;
2557 	nat->nat_pnext = &ifs->ifs_nat_instances;
2558 	if (ifs->ifs_nat_instances)
2559 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2560 	ifs->ifs_nat_instances = nat;
2561 
2562 	natp = &ifs->ifs_nat_table[0][hv1];
2563 	if (*natp)
2564 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2565 	nat->nat_phnext[0] = natp;
2566 	nat->nat_hnext[0] = *natp;
2567 	*natp = nat;
2568 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2569 
2570 	natp = &ifs->ifs_nat_table[1][hv2];
2571 	if (*natp)
2572 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2573 	nat->nat_phnext[1] = natp;
2574 	nat->nat_hnext[1] = *natp;
2575 	*natp = nat;
2576 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2577 
2578 	fr_setnatqueue(nat, rev, ifs);
2579 
2580 	ifs->ifs_nat_stats.ns_added++;
2581 	ifs->ifs_nat_stats.ns_inuse++;
2582 	return 0;
2583 }
2584 
2585 
2586 /* ------------------------------------------------------------------------ */
2587 /* Function:    nat_icmperrorlookup                                         */
2588 /* Returns:     nat_t* - point to matching NAT structure                    */
2589 /* Parameters:  fin(I) - pointer to packet information                      */
2590 /*              dir(I) - direction of packet (in/out)                       */
2591 /*                                                                          */
2592 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2593 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2594 /* the required length.                                                     */
2595 /* ------------------------------------------------------------------------ */
2596 nat_t *nat_icmperrorlookup(fin, dir)
2597 fr_info_t *fin;
2598 int dir;
2599 {
2600 	int flags = 0, minlen;
2601 	icmphdr_t *orgicmp;
2602 	tcphdr_t *tcp = NULL;
2603 	u_short data[2];
2604 	nat_t *nat;
2605 	ip_t *oip;
2606 	u_int p;
2607 
2608 	/*
2609 	 * Does it at least have the return (basic) IP header ?
2610 	 * Only a basic IP header (no options) should be with an ICMP error
2611 	 * header.  Also, if it's not an error type, then return.
2612 	 */
2613 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2614 		return NULL;
2615 
2616 	/*
2617 	 * Check packet size
2618 	 */
2619 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2620 	minlen = IP_HL(oip) << 2;
2621 	if ((minlen < sizeof(ip_t)) ||
2622 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2623 		return NULL;
2624 	/*
2625 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2626 	 * header claimed in the encapsulated part which is of concern.  It
2627 	 * may be too big to be in this buffer but not so big that it's
2628 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2629 	 * This is possible because we don't know how big oip_hl is when we
2630 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2631 	 * all here now.
2632 	 */
2633 #ifdef  _KERNEL
2634 	{
2635 	mb_t *m;
2636 
2637 	m = fin->fin_m;
2638 # if defined(MENTAT)
2639 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2640 		return NULL;
2641 # else
2642 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2643 	    (char *)fin->fin_ip + M_LEN(m))
2644 		return NULL;
2645 # endif
2646 	}
2647 #endif
2648 
2649 	if (fin->fin_daddr != oip->ip_src.s_addr)
2650 		return NULL;
2651 
2652 	p = oip->ip_p;
2653 	if (p == IPPROTO_TCP)
2654 		flags = IPN_TCP;
2655 	else if (p == IPPROTO_UDP)
2656 		flags = IPN_UDP;
2657 	else if (p == IPPROTO_ICMP) {
2658 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2659 
2660 		/* see if this is related to an ICMP query */
2661 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2662 			data[0] = fin->fin_data[0];
2663 			data[1] = fin->fin_data[1];
2664 			fin->fin_data[0] = 0;
2665 			fin->fin_data[1] = orgicmp->icmp_id;
2666 
2667 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2668 			/*
2669 			 * NOTE : dir refers to the direction of the original
2670 			 *        ip packet. By definition the icmp error
2671 			 *        message flows in the opposite direction.
2672 			 */
2673 			if (dir == NAT_INBOUND)
2674 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2675 						   oip->ip_src);
2676 			else
2677 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2678 						    oip->ip_src);
2679 			fin->fin_data[0] = data[0];
2680 			fin->fin_data[1] = data[1];
2681 			return nat;
2682 		}
2683 	}
2684 
2685 	if (flags & IPN_TCPUDP) {
2686 		minlen += 8;		/* + 64bits of data to get ports */
2687 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2688 			return NULL;
2689 
2690 		data[0] = fin->fin_data[0];
2691 		data[1] = fin->fin_data[1];
2692 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2693 		fin->fin_data[0] = ntohs(tcp->th_dport);
2694 		fin->fin_data[1] = ntohs(tcp->th_sport);
2695 
2696 		if (dir == NAT_INBOUND) {
2697 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2698 					   oip->ip_src);
2699 		} else {
2700 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2701 					    oip->ip_src);
2702 		}
2703 		fin->fin_data[0] = data[0];
2704 		fin->fin_data[1] = data[1];
2705 		return nat;
2706 	}
2707 	if (dir == NAT_INBOUND)
2708 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2709 	else
2710 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2711 }
2712 
2713 
2714 /* ------------------------------------------------------------------------ */
2715 /* Function:    nat_icmperror                                               */
2716 /* Returns:     nat_t* - point to matching NAT structure                    */
2717 /* Parameters:  fin(I)    - pointer to packet information                   */
2718 /*              nflags(I) - NAT flags for this packet                       */
2719 /*              dir(I)    - direction of packet (in/out)                    */
2720 /*                                                                          */
2721 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2722 /* session.  This will correct both packet header data and checksums.       */
2723 /*                                                                          */
2724 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2725 /* a NAT'd ICMP packet gets correctly recognised.                           */
2726 /* ------------------------------------------------------------------------ */
2727 nat_t *nat_icmperror(fin, nflags, dir)
2728 fr_info_t *fin;
2729 u_int *nflags;
2730 int dir;
2731 {
2732 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2733 	struct in_addr in;
2734 	icmphdr_t *icmp, *orgicmp;
2735 	int dlen;
2736 	udphdr_t *udp;
2737 	tcphdr_t *tcp;
2738 	nat_t *nat;
2739 	ip_t *oip;
2740 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2741 		return NULL;
2742 
2743 	/*
2744 	 * nat_icmperrorlookup() looks up nat entry associated with the
2745 	 * offending IP packet and returns pointer to the entry, or NULL
2746 	 * if packet wasn't natted or for `defective' packets.
2747 	 */
2748 
2749 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2750 		return NULL;
2751 
2752 	sumd2 = 0;
2753 	*nflags = IPN_ICMPERR;
2754 	icmp = fin->fin_dp;
2755 	oip = (ip_t *)&icmp->icmp_ip;
2756 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2757 	tcp = (tcphdr_t *)udp;
2758 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2759 
2760 	/*
2761 	 * Need to adjust ICMP header to include the real IP#'s and
2762 	 * port #'s.  There are three steps required.
2763 	 *
2764 	 * Step 1
2765 	 * Fix the IP addresses in the offending IP packet and update
2766 	 * ip header checksum to compensate for the change.
2767 	 *
2768 	 * No update needed here for icmp_cksum because the ICMP checksum
2769 	 * is calculated over the complete ICMP packet, which includes the
2770 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2771 	 * cancel each other out (if the delta for the IP address is x,
2772 	 * then the delta for ip_sum is minus x).
2773 	 */
2774 
2775 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2776 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2777 		in = nat->nat_inip;
2778 		oip->ip_src = in;
2779 	} else {
2780 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2781 		in = nat->nat_outip;
2782 		oip->ip_dst = in;
2783 	}
2784 
2785 	sum2 = LONG_SUM(ntohl(in.s_addr));
2786 	CALC_SUMD(sum1, sum2, sumd);
2787 	fix_datacksum(&oip->ip_sum, sumd);
2788 
2789 	/*
2790 	 * Step 2
2791 	 * Perform other adjustments based on protocol of offending packet.
2792 	 */
2793 
2794 	switch (oip->ip_p) {
2795 		case IPPROTO_TCP :
2796 		case IPPROTO_UDP :
2797 
2798 			/*
2799 			* For offending TCP/UDP IP packets, translate the ports
2800 			* based on the NAT specification.
2801 			*
2802 			* Advance notice : Now it becomes complicated :-)
2803 			*
2804 			* Since the port and IP addresse fields are both part
2805 			* of the TCP/UDP checksum of the offending IP packet,
2806 			* we need to adjust that checksum as well.
2807 			*
2808 			* To further complicate things, the TCP/UDP checksum
2809 			* may not be present.  We must check to see if the
2810 			* length of the data portion is big enough to hold
2811 			* the checksum.  In the UDP case, a test to determine
2812 			* if the checksum is even set is also required.
2813 			*
2814 			* Any changes to an IP address, port or checksum within
2815 			* the ICMP packet requires a change to icmp_cksum.
2816 			*
2817 			* Be extremely careful here ... The change is dependent
2818 			* upon whether or not the TCP/UPD checksum is present.
2819 			*
2820 			* If TCP/UPD checksum is present, the icmp_cksum must
2821 			* compensate for checksum modification resulting from
2822 			* IP address change only.  Port change and resulting
2823 			* data checksum adjustments cancel each other out.
2824 			*
2825 			* If TCP/UDP checksum is not present, icmp_cksum must
2826 			* compensate for port change only.  The IP address
2827 			* change does not modify anything else in this case.
2828 			*/
2829 
2830 			psum1 = 0;
2831 			psum2 = 0;
2832 			psumd = 0;
2833 
2834 			if ((tcp->th_dport == nat->nat_oport) &&
2835 			    (tcp->th_sport != nat->nat_inport)) {
2836 
2837 				/*
2838 				 * Translate the source port.
2839 				 */
2840 
2841 				psum1 = ntohs(tcp->th_sport);
2842 				psum2 = ntohs(nat->nat_inport);
2843 				tcp->th_sport = nat->nat_inport;
2844 
2845 			} else if ((tcp->th_sport == nat->nat_oport) &&
2846 				    (tcp->th_dport != nat->nat_outport)) {
2847 
2848 				/*
2849 				 * Translate the destination port.
2850 				 */
2851 
2852 				psum1 = ntohs(tcp->th_dport);
2853 				psum2 = ntohs(nat->nat_outport);
2854 				tcp->th_dport = nat->nat_outport;
2855 			}
2856 
2857 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2858 
2859 				/*
2860 				 * TCP checksum present.
2861 				 *
2862 				 * Adjust data checksum and icmp checksum to
2863 				 * compensate for any IP address change.
2864 				 */
2865 
2866 				sum1 = ntohs(tcp->th_sum);
2867 				fix_datacksum(&tcp->th_sum, sumd);
2868 				sum2 = ntohs(tcp->th_sum);
2869 				sumd2 = sumd << 1;
2870 				CALC_SUMD(sum1, sum2, sumd);
2871 				sumd2 += sumd;
2872 
2873 				/*
2874 				 * Also make data checksum adjustment to
2875 				 * compensate for any port change.
2876 				 */
2877 
2878 				if (psum1 != psum2) {
2879 					CALC_SUMD(psum1, psum2, psumd);
2880 					fix_datacksum(&tcp->th_sum, psumd);
2881 				}
2882 
2883 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2884 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2885 
2886 				/*
2887 				 * The UDP checksum is present and set.
2888 				 *
2889 				 * Adjust data checksum and icmp checksum to
2890 				 * compensate for any IP address change.
2891 				 */
2892 
2893 				sum1 = ntohs(udp->uh_sum);
2894 				fix_datacksum(&udp->uh_sum, sumd);
2895 				sum2 = ntohs(udp->uh_sum);
2896 				sumd2 = sumd << 1;
2897 				CALC_SUMD(sum1, sum2, sumd);
2898 				sumd2 += sumd;
2899 
2900 				/*
2901 				 * Also make data checksum adjustment to
2902 				 * compensate for any port change.
2903 				 */
2904 
2905 				if (psum1 != psum2) {
2906 					CALC_SUMD(psum1, psum2, psumd);
2907 					fix_datacksum(&udp->uh_sum, psumd);
2908 				}
2909 
2910 			} else {
2911 
2912 				/*
2913 				 * Data checksum was not present.
2914 				 *
2915 				 * Compensate for any port change.
2916 				 */
2917 
2918 				CALC_SUMD(psum2, psum1, psumd);
2919 				sumd2 += psumd;
2920 			}
2921 			break;
2922 
2923 		case IPPROTO_ICMP :
2924 
2925 			orgicmp = (icmphdr_t *)udp;
2926 
2927 			if ((nat->nat_dir == NAT_OUTBOUND) &&
2928 			    (orgicmp->icmp_id != nat->nat_inport) &&
2929 			    (dlen >= 8)) {
2930 
2931 				/*
2932 				 * Fix ICMP checksum (of the offening ICMP
2933 				 * query packet) to compensate the change
2934 				 * in the ICMP id of the offending ICMP
2935 				 * packet.
2936 				 *
2937 				 * Since you modify orgicmp->icmp_id with
2938 				 * a delta (say x) and you compensate that
2939 				 * in origicmp->icmp_cksum with a delta
2940 				 * minus x, you don't have to adjust the
2941 				 * overall icmp->icmp_cksum
2942 				 */
2943 
2944 				sum1 = ntohs(orgicmp->icmp_id);
2945 				sum2 = ntohs(nat->nat_inport);
2946 				CALC_SUMD(sum1, sum2, sumd);
2947 				orgicmp->icmp_id = nat->nat_inport;
2948 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
2949 
2950 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
2951 
2952 			break;
2953 
2954 		default :
2955 
2956 			break;
2957 
2958 	} /* switch (oip->ip_p) */
2959 
2960 	/*
2961 	 * Step 3
2962 	 * Make the adjustments to icmp checksum.
2963 	 */
2964 
2965 	if (sumd2 != 0) {
2966 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2967 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2968 		fix_incksum(&icmp->icmp_cksum, sumd2);
2969 	}
2970 	return nat;
2971 }
2972 
2973 
2974 /*
2975  * NB: these lookups don't lock access to the list, it assumed that it has
2976  * already been done!
2977  */
2978 
2979 /* ------------------------------------------------------------------------ */
2980 /* Function:    nat_inlookup                                                */
2981 /* Returns:     nat_t* - NULL == no match,                                  */
2982 /*                       else pointer to matching NAT entry                 */
2983 /* Parameters:  fin(I)    - pointer to packet information                   */
2984 /*              flags(I)  - NAT flags for this packet                       */
2985 /*              p(I)      - protocol for this packet                        */
2986 /*              src(I)    - source IP address                               */
2987 /*              mapdst(I) - destination IP address                          */
2988 /*                                                                          */
2989 /* Lookup a nat entry based on the mapped destination ip address/port and   */
2990 /* real source address/port.  We use this lookup when receiving a packet,   */
2991 /* we're looking for a table entry, based on the destination address.       */
2992 /*                                                                          */
2993 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
2994 /*                                                                          */
2995 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
2996 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
2997 /*                                                                          */
2998 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
2999 /*            the packet is of said protocol                                */
3000 /* ------------------------------------------------------------------------ */
3001 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3002 fr_info_t *fin;
3003 u_int flags, p;
3004 struct in_addr src , mapdst;
3005 {
3006 	u_short sport, dport;
3007 	ipnat_t *ipn;
3008 	u_int sflags;
3009 	nat_t *nat;
3010 	int nflags;
3011 	u_32_t dst;
3012 	void *ifp;
3013 	u_int hv;
3014 	ipf_stack_t *ifs = fin->fin_ifs;
3015 
3016 	if (fin != NULL)
3017 		ifp = fin->fin_ifp;
3018 	else
3019 		ifp = NULL;
3020 	sport = 0;
3021 	dport = 0;
3022 	dst = mapdst.s_addr;
3023 	sflags = flags & NAT_TCPUDPICMP;
3024 
3025 	switch (p)
3026 	{
3027 	case IPPROTO_TCP :
3028 	case IPPROTO_UDP :
3029 		sport = htons(fin->fin_data[0]);
3030 		dport = htons(fin->fin_data[1]);
3031 		break;
3032 	case IPPROTO_ICMP :
3033 		if (flags & IPN_ICMPERR)
3034 			sport = fin->fin_data[1];
3035 		else
3036 			dport = fin->fin_data[1];
3037 		break;
3038 	default :
3039 		break;
3040 	}
3041 
3042 
3043 	if ((flags & SI_WILDP) != 0)
3044 		goto find_in_wild_ports;
3045 
3046 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3047 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3048 	nat = ifs->ifs_nat_table[1][hv];
3049 	for (; nat; nat = nat->nat_hnext[1]) {
3050 		if (nat->nat_ifps[0] != NULL) {
3051 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3052 				continue;
3053 		} else if (ifp != NULL)
3054 			nat->nat_ifps[0] = ifp;
3055 
3056 		nflags = nat->nat_flags;
3057 
3058 		if (nat->nat_oip.s_addr == src.s_addr &&
3059 		    nat->nat_outip.s_addr == dst &&
3060 		    (((p == 0) &&
3061 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3062 		     || (p == nat->nat_p))) {
3063 			switch (p)
3064 			{
3065 #if 0
3066 			case IPPROTO_GRE :
3067 				if (nat->nat_call[1] != fin->fin_data[0])
3068 					continue;
3069 				break;
3070 #endif
3071 			case IPPROTO_ICMP :
3072 				if ((flags & IPN_ICMPERR) != 0) {
3073 					if (nat->nat_outport != sport)
3074 						continue;
3075 				} else {
3076 					if (nat->nat_outport != dport)
3077 						continue;
3078 				}
3079 				break;
3080 			case IPPROTO_TCP :
3081 			case IPPROTO_UDP :
3082 				if (nat->nat_oport != sport)
3083 					continue;
3084 				if (nat->nat_outport != dport)
3085 					continue;
3086 				break;
3087 			default :
3088 				break;
3089 			}
3090 
3091 			ipn = nat->nat_ptr;
3092 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3093 				if (appr_match(fin, nat) != 0)
3094 					continue;
3095 			return nat;
3096 		}
3097 	}
3098 
3099 	/*
3100 	 * So if we didn't find it but there are wildcard members in the hash
3101 	 * table, go back and look for them.  We do this search and update here
3102 	 * because it is modifying the NAT table and we want to do this only
3103 	 * for the first packet that matches.  The exception, of course, is
3104 	 * for "dummy" (FI_IGNORE) lookups.
3105 	 */
3106 find_in_wild_ports:
3107 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3108 		return NULL;
3109 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3110 		return NULL;
3111 
3112 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3113 
3114 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3115 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3116 
3117 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3118 
3119 	nat = ifs->ifs_nat_table[1][hv];
3120 	for (; nat; nat = nat->nat_hnext[1]) {
3121 		if (nat->nat_ifps[0] != NULL) {
3122 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3123 				continue;
3124 		} else if (ifp != NULL)
3125 			nat->nat_ifps[0] = ifp;
3126 
3127 		if (nat->nat_p != fin->fin_p)
3128 			continue;
3129 		if (nat->nat_oip.s_addr != src.s_addr ||
3130 		    nat->nat_outip.s_addr != dst)
3131 			continue;
3132 
3133 		nflags = nat->nat_flags;
3134 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3135 			continue;
3136 
3137 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3138 			       NAT_INBOUND) == 1) {
3139 			if ((fin->fin_flx & FI_IGNORE) != 0)
3140 				break;
3141 			if ((nflags & SI_CLONE) != 0) {
3142 				nat = fr_natclone(fin, nat);
3143 				if (nat == NULL)
3144 					break;
3145 			} else {
3146 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3147 				ifs->ifs_nat_stats.ns_wilds--;
3148 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3149 			}
3150 			nat->nat_oport = sport;
3151 			nat->nat_outport = dport;
3152 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3153 			nat_tabmove(nat, ifs);
3154 			break;
3155 		}
3156 	}
3157 
3158 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3159 
3160 	return nat;
3161 }
3162 
3163 
3164 /* ------------------------------------------------------------------------ */
3165 /* Function:    nat_tabmove                                                 */
3166 /* Returns:     Nil                                                         */
3167 /* Parameters:  nat(I) - pointer to NAT structure                           */
3168 /* Write Lock:  ipf_nat                                                     */
3169 /*                                                                          */
3170 /* This function is only called for TCP/UDP NAT table entries where the     */
3171 /* original was placed in the table without hashing on the ports and we now */
3172 /* want to include hashing on port numbers.                                 */
3173 /* ------------------------------------------------------------------------ */
3174 static void nat_tabmove(nat, ifs)
3175 nat_t *nat;
3176 ipf_stack_t *ifs;
3177 {
3178 	nat_t **natp;
3179 	u_int hv;
3180 
3181 	if (nat->nat_flags & SI_CLONE)
3182 		return;
3183 
3184 	/*
3185 	 * Remove the NAT entry from the old location
3186 	 */
3187 	if (nat->nat_hnext[0])
3188 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3189 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3190 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3191 
3192 	if (nat->nat_hnext[1])
3193 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3194 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3195 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3196 
3197 	/*
3198 	 * Add into the NAT table in the new position
3199 	 */
3200 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3201 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3202 			 ifs->ifs_ipf_nattable_sz);
3203 	nat->nat_hv[0] = hv;
3204 	natp = &ifs->ifs_nat_table[0][hv];
3205 	if (*natp)
3206 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3207 	nat->nat_phnext[0] = natp;
3208 	nat->nat_hnext[0] = *natp;
3209 	*natp = nat;
3210 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3211 
3212 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3213 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3214 			 ifs->ifs_ipf_nattable_sz);
3215 	nat->nat_hv[1] = hv;
3216 	natp = &ifs->ifs_nat_table[1][hv];
3217 	if (*natp)
3218 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3219 	nat->nat_phnext[1] = natp;
3220 	nat->nat_hnext[1] = *natp;
3221 	*natp = nat;
3222 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3223 }
3224 
3225 
3226 /* ------------------------------------------------------------------------ */
3227 /* Function:    nat_outlookup                                               */
3228 /* Returns:     nat_t* - NULL == no match,                                  */
3229 /*                       else pointer to matching NAT entry                 */
3230 /* Parameters:  fin(I)   - pointer to packet information                    */
3231 /*              flags(I) - NAT flags for this packet                        */
3232 /*              p(I)     - protocol for this packet                         */
3233 /*              src(I)   - source IP address                                */
3234 /*              dst(I)   - destination IP address                           */
3235 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3236 /*                                                                          */
3237 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3238 /* destination address/port.  We use this lookup when sending a packet out, */
3239 /* we're looking for a table entry, based on the source address.            */
3240 /*                                                                          */
3241 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3242 /*                                                                          */
3243 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3244 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3245 /*                                                                          */
3246 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3247 /*            the packet is of said protocol                                */
3248 /* ------------------------------------------------------------------------ */
3249 nat_t *nat_outlookup(fin, flags, p, src, dst)
3250 fr_info_t *fin;
3251 u_int flags, p;
3252 struct in_addr src , dst;
3253 {
3254 	u_short sport, dport;
3255 	u_int sflags;
3256 	ipnat_t *ipn;
3257 	u_32_t srcip;
3258 	nat_t *nat;
3259 	int nflags;
3260 	void *ifp;
3261 	u_int hv;
3262 	ipf_stack_t *ifs = fin->fin_ifs;
3263 
3264 	ifp = fin->fin_ifp;
3265 
3266 	srcip = src.s_addr;
3267 	sflags = flags & IPN_TCPUDPICMP;
3268 	sport = 0;
3269 	dport = 0;
3270 
3271 	switch (p)
3272 	{
3273 	case IPPROTO_TCP :
3274 	case IPPROTO_UDP :
3275 		sport = htons(fin->fin_data[0]);
3276 		dport = htons(fin->fin_data[1]);
3277 		break;
3278 	case IPPROTO_ICMP :
3279 		if (flags & IPN_ICMPERR)
3280 			sport = fin->fin_data[1];
3281 		else
3282 			dport = fin->fin_data[1];
3283 		break;
3284 	default :
3285 		break;
3286 	}
3287 
3288 	if ((flags & SI_WILDP) != 0)
3289 		goto find_out_wild_ports;
3290 
3291 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3292 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3293 	nat = ifs->ifs_nat_table[0][hv];
3294 	for (; nat; nat = nat->nat_hnext[0]) {
3295 		if (nat->nat_ifps[1] != NULL) {
3296 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3297 				continue;
3298 		} else if (ifp != NULL)
3299 			nat->nat_ifps[1] = ifp;
3300 
3301 		nflags = nat->nat_flags;
3302 
3303 		if (nat->nat_inip.s_addr == srcip &&
3304 		    nat->nat_oip.s_addr == dst.s_addr &&
3305 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3306 		     || (p == nat->nat_p))) {
3307 			switch (p)
3308 			{
3309 #if 0
3310 			case IPPROTO_GRE :
3311 				if (nat->nat_call[1] != fin->fin_data[0])
3312 					continue;
3313 				break;
3314 #endif
3315 			case IPPROTO_TCP :
3316 			case IPPROTO_UDP :
3317 				if (nat->nat_oport != dport)
3318 					continue;
3319 				if (nat->nat_inport != sport)
3320 					continue;
3321 				break;
3322 			default :
3323 				break;
3324 			}
3325 
3326 			ipn = nat->nat_ptr;
3327 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3328 				if (appr_match(fin, nat) != 0)
3329 					continue;
3330 			return nat;
3331 		}
3332 	}
3333 
3334 	/*
3335 	 * So if we didn't find it but there are wildcard members in the hash
3336 	 * table, go back and look for them.  We do this search and update here
3337 	 * because it is modifying the NAT table and we want to do this only
3338 	 * for the first packet that matches.  The exception, of course, is
3339 	 * for "dummy" (FI_IGNORE) lookups.
3340 	 */
3341 find_out_wild_ports:
3342 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3343 		return NULL;
3344 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3345 		return NULL;
3346 
3347 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3348 
3349 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3350 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3351 
3352 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3353 
3354 	nat = ifs->ifs_nat_table[0][hv];
3355 	for (; nat; nat = nat->nat_hnext[0]) {
3356 		if (nat->nat_ifps[1] != NULL) {
3357 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3358 				continue;
3359 		} else if (ifp != NULL)
3360 			nat->nat_ifps[1] = ifp;
3361 
3362 		if (nat->nat_p != fin->fin_p)
3363 			continue;
3364 		if ((nat->nat_inip.s_addr != srcip) ||
3365 		    (nat->nat_oip.s_addr != dst.s_addr))
3366 			continue;
3367 
3368 		nflags = nat->nat_flags;
3369 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3370 			continue;
3371 
3372 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3373 			       NAT_OUTBOUND) == 1) {
3374 			if ((fin->fin_flx & FI_IGNORE) != 0)
3375 				break;
3376 			if ((nflags & SI_CLONE) != 0) {
3377 				nat = fr_natclone(fin, nat);
3378 				if (nat == NULL)
3379 					break;
3380 			} else {
3381 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3382 				ifs->ifs_nat_stats.ns_wilds--;
3383 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3384 			}
3385 			nat->nat_inport = sport;
3386 			nat->nat_oport = dport;
3387 			if (nat->nat_outport == 0)
3388 				nat->nat_outport = sport;
3389 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3390 			nat_tabmove(nat, ifs);
3391 			break;
3392 		}
3393 	}
3394 
3395 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3396 
3397 	return nat;
3398 }
3399 
3400 
3401 /* ------------------------------------------------------------------------ */
3402 /* Function:    nat_lookupredir                                             */
3403 /* Returns:     nat_t* - NULL == no match,                                  */
3404 /*                       else pointer to matching NAT entry                 */
3405 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3406 /*                      entry for.                                          */
3407 /*                                                                          */
3408 /* Lookup the NAT tables to search for a matching redirect                  */
3409 /* ------------------------------------------------------------------------ */
3410 nat_t *nat_lookupredir(np, ifs)
3411 natlookup_t *np;
3412 ipf_stack_t *ifs;
3413 {
3414 	fr_info_t fi;
3415 	nat_t *nat;
3416 
3417 	bzero((char *)&fi, sizeof(fi));
3418 	if (np->nl_flags & IPN_IN) {
3419 		fi.fin_data[0] = ntohs(np->nl_realport);
3420 		fi.fin_data[1] = ntohs(np->nl_outport);
3421 	} else {
3422 		fi.fin_data[0] = ntohs(np->nl_inport);
3423 		fi.fin_data[1] = ntohs(np->nl_outport);
3424 	}
3425 	if (np->nl_flags & IPN_TCP)
3426 		fi.fin_p = IPPROTO_TCP;
3427 	else if (np->nl_flags & IPN_UDP)
3428 		fi.fin_p = IPPROTO_UDP;
3429 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3430 		fi.fin_p = IPPROTO_ICMP;
3431 
3432 	fi.fin_ifs = ifs;
3433 	/*
3434 	 * We can do two sorts of lookups:
3435 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3436 	 * - default: we have the `in' and `out' address, look for `real'.
3437 	 */
3438 	if (np->nl_flags & IPN_IN) {
3439 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3440 					np->nl_realip, np->nl_outip))) {
3441 			np->nl_inip = nat->nat_inip;
3442 			np->nl_inport = nat->nat_inport;
3443 		}
3444 	} else {
3445 		/*
3446 		 * If nl_inip is non null, this is a lookup based on the real
3447 		 * ip address. Else, we use the fake.
3448 		 */
3449 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3450 					 np->nl_inip, np->nl_outip))) {
3451 
3452 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3453 				fr_info_t fin;
3454 				bzero((char *)&fin, sizeof(fin));
3455 				fin.fin_p = nat->nat_p;
3456 				fin.fin_data[0] = ntohs(nat->nat_outport);
3457 				fin.fin_data[1] = ntohs(nat->nat_oport);
3458 				fin.fin_ifs = ifs;
3459 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3460 						 nat->nat_outip,
3461 						 nat->nat_oip) != NULL) {
3462 					np->nl_flags &= ~IPN_FINDFORWARD;
3463 				}
3464 			}
3465 
3466 			np->nl_realip = nat->nat_outip;
3467 			np->nl_realport = nat->nat_outport;
3468 		}
3469  	}
3470 
3471 	return nat;
3472 }
3473 
3474 
3475 /* ------------------------------------------------------------------------ */
3476 /* Function:    nat_match                                                   */
3477 /* Returns:     int - 0 == no match, 1 == match                             */
3478 /* Parameters:  fin(I)   - pointer to packet information                    */
3479 /*              np(I)    - pointer to NAT rule                              */
3480 /*                                                                          */
3481 /* Pull the matching of a packet against a NAT rule out of that complex     */
3482 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3483 /* ------------------------------------------------------------------------ */
3484 static int nat_match(fin, np)
3485 fr_info_t *fin;
3486 ipnat_t *np;
3487 {
3488 	frtuc_t *ft;
3489 
3490 	if (fin->fin_v != 4)
3491 		return 0;
3492 
3493 	if (np->in_p && fin->fin_p != np->in_p)
3494 		return 0;
3495 
3496 	if (fin->fin_out) {
3497 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3498 			return 0;
3499 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3500 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3501 			return 0;
3502 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3503 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3504 			return 0;
3505 	} else {
3506 		if (!(np->in_redir & NAT_REDIRECT))
3507 			return 0;
3508 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3509 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3510 			return 0;
3511 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3512 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3513 			return 0;
3514 	}
3515 
3516 	ft = &np->in_tuc;
3517 	if (!(fin->fin_flx & FI_TCPUDP) ||
3518 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3519 		if (ft->ftu_scmp || ft->ftu_dcmp)
3520 			return 0;
3521 		return 1;
3522 	}
3523 
3524 	return fr_tcpudpchk(fin, ft);
3525 }
3526 
3527 
3528 /* ------------------------------------------------------------------------ */
3529 /* Function:    nat_update                                                  */
3530 /* Returns:     Nil                                                         */
3531 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3532 /*              np(I)     - pointer to NAT rule                             */
3533 /*                                                                          */
3534 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3535 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3536 /* ------------------------------------------------------------------------ */
3537 void nat_update(fin, nat, np)
3538 fr_info_t *fin;
3539 nat_t *nat;
3540 ipnat_t *np;
3541 {
3542 	ipftq_t *ifq, *ifq2;
3543 	ipftqent_t *tqe;
3544 	ipf_stack_t *ifs = fin->fin_ifs;
3545 
3546 	MUTEX_ENTER(&nat->nat_lock);
3547 	tqe = &nat->nat_tqe;
3548 	ifq = tqe->tqe_ifq;
3549 
3550 	/*
3551 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3552 	 * TCP, however, if it is TCP and there is no rule timeout set,
3553 	 * then do not update the timeout here.
3554 	 */
3555 	if (np != NULL)
3556 		ifq2 = np->in_tqehead[fin->fin_rev];
3557 	else
3558 		ifq2 = NULL;
3559 
3560 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3561 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3562 	} else {
3563 		if (ifq2 == NULL) {
3564 			if (nat->nat_p == IPPROTO_UDP)
3565 				ifq2 = &ifs->ifs_nat_udptq;
3566 			else if (nat->nat_p == IPPROTO_ICMP)
3567 				ifq2 = &ifs->ifs_nat_icmptq;
3568 			else
3569 				ifq2 = &ifs->ifs_nat_iptq;
3570 		}
3571 
3572 		fr_movequeue(tqe, ifq, ifq2, ifs);
3573 	}
3574 	MUTEX_EXIT(&nat->nat_lock);
3575 }
3576 
3577 
3578 /* ------------------------------------------------------------------------ */
3579 /* Function:    fr_checknatout                                              */
3580 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3581 /*                     0 == no packet translation occurred,                 */
3582 /*                     1 == packet was successfully translated.             */
3583 /* Parameters:  fin(I)   - pointer to packet information                    */
3584 /*              passp(I) - pointer to filtering result flags                */
3585 /*                                                                          */
3586 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3587 /* first checked to see if they match an existing entry (if an error),      */
3588 /* otherwise a search of the current NAT table is made.  If neither results */
3589 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3590 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3591 /* packet header(s) as required.                                            */
3592 /* ------------------------------------------------------------------------ */
3593 int fr_checknatout(fin, passp)
3594 fr_info_t *fin;
3595 u_32_t *passp;
3596 {
3597 	struct ifnet *ifp, *sifp;
3598 	icmphdr_t *icmp = NULL;
3599 	tcphdr_t *tcp = NULL;
3600 	int rval, natfailed;
3601 	ipnat_t *np = NULL;
3602 	u_int nflags = 0;
3603 	u_32_t ipa, iph;
3604 	int natadd = 1;
3605 	frentry_t *fr;
3606 	nat_t *nat;
3607 	ipf_stack_t *ifs = fin->fin_ifs;
3608 
3609 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3610 		return 0;
3611 
3612 	natfailed = 0;
3613 	fr = fin->fin_fr;
3614 	sifp = fin->fin_ifp;
3615 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3616 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3617 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3618 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3619 	ifp = fin->fin_ifp;
3620 
3621 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3622 		switch (fin->fin_p)
3623 		{
3624 		case IPPROTO_TCP :
3625 			nflags = IPN_TCP;
3626 			break;
3627 		case IPPROTO_UDP :
3628 			nflags = IPN_UDP;
3629 			break;
3630 		case IPPROTO_ICMP :
3631 			icmp = fin->fin_dp;
3632 
3633 			/*
3634 			 * This is an incoming packet, so the destination is
3635 			 * the icmp_id and the source port equals 0
3636 			 */
3637 			if (nat_icmpquerytype4(icmp->icmp_type))
3638 				nflags = IPN_ICMPQUERY;
3639 			break;
3640 		default :
3641 			break;
3642 		}
3643 
3644 		if ((nflags & IPN_TCPUDP))
3645 			tcp = fin->fin_dp;
3646 	}
3647 
3648 	ipa = fin->fin_saddr;
3649 
3650 	READ_ENTER(&ifs->ifs_ipf_nat);
3651 
3652 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3653 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3654 		/*EMPTY*/;
3655 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3656 		natadd = 0;
3657 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3658 				      fin->fin_src, fin->fin_dst))) {
3659 		nflags = nat->nat_flags;
3660 	} else {
3661 		u_32_t hv, msk, nmsk;
3662 
3663 		/*
3664 		 * If there is no current entry in the nat table for this IP#,
3665 		 * create one for it (if there is a matching rule).
3666 		 */
3667 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3668 		msk = 0xffffffff;
3669 		nmsk = ifs->ifs_nat_masks;
3670 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3671 maskloop:
3672 		iph = ipa & htonl(msk);
3673 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3674 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3675 		{
3676 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3677 				continue;
3678 			if (np->in_v != fin->fin_v)
3679 				continue;
3680 			if (np->in_p && (np->in_p != fin->fin_p))
3681 				continue;
3682 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3683 				continue;
3684 			if (np->in_flags & IPN_FILTER) {
3685 				if (!nat_match(fin, np))
3686 					continue;
3687 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3688 				continue;
3689 
3690 			if ((fr != NULL) &&
3691 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3692 				continue;
3693 
3694 			if (*np->in_plabel != '\0') {
3695 				if (((np->in_flags & IPN_FILTER) == 0) &&
3696 				    (np->in_dport != tcp->th_dport))
3697 					continue;
3698 				if (appr_ok(fin, tcp, np) == 0)
3699 					continue;
3700 			}
3701 
3702 			if ((nat = nat_new(fin, np, NULL, nflags,
3703 					   NAT_OUTBOUND))) {
3704 				np->in_hits++;
3705 				break;
3706 			} else
3707 				natfailed = -1;
3708 		}
3709 		if ((np == NULL) && (nmsk != 0)) {
3710 			while (nmsk) {
3711 				msk <<= 1;
3712 				if (nmsk & 0x80000000)
3713 					break;
3714 				nmsk <<= 1;
3715 			}
3716 			if (nmsk != 0) {
3717 				nmsk <<= 1;
3718 				goto maskloop;
3719 			}
3720 		}
3721 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3722 	}
3723 
3724 	if (nat != NULL) {
3725 		rval = fr_natout(fin, nat, natadd, nflags);
3726 		if (rval == 1) {
3727 			MUTEX_ENTER(&nat->nat_lock);
3728 			nat->nat_ref++;
3729 			MUTEX_EXIT(&nat->nat_lock);
3730 			nat->nat_touched = ifs->ifs_fr_ticks;
3731 			fin->fin_nat = nat;
3732 		}
3733 	} else
3734 		rval = natfailed;
3735 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3736 
3737 	if (rval == -1) {
3738 		if (passp != NULL)
3739 			*passp = FR_BLOCK;
3740 		fin->fin_flx |= FI_BADNAT;
3741 	}
3742 	fin->fin_ifp = sifp;
3743 	return rval;
3744 }
3745 
3746 /* ------------------------------------------------------------------------ */
3747 /* Function:    fr_natout                                                   */
3748 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3749 /*                     1 == packet was successfully translated.             */
3750 /* Parameters:  fin(I)    - pointer to packet information                   */
3751 /*              nat(I)    - pointer to NAT structure                        */
3752 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3753 /*              nflags(I) - NAT flags set for this packet                   */
3754 /*                                                                          */
3755 /* Translate a packet coming "out" on an interface.                         */
3756 /* ------------------------------------------------------------------------ */
3757 int fr_natout(fin, nat, natadd, nflags)
3758 fr_info_t *fin;
3759 nat_t *nat;
3760 int natadd;
3761 u_32_t nflags;
3762 {
3763 	icmphdr_t *icmp;
3764 	u_short *csump;
3765 	u_32_t sumd;
3766 	tcphdr_t *tcp;
3767 	ipnat_t *np;
3768 	int i;
3769 	ipf_stack_t *ifs = fin->fin_ifs;
3770 
3771 #if SOLARIS && defined(_KERNEL)
3772 	net_data_t net_data_p;
3773 	if (fin->fin_v == 4)
3774 		net_data_p = ifs->ifs_ipf_ipv4;
3775 	else
3776 		net_data_p = ifs->ifs_ipf_ipv6;
3777 #endif
3778 
3779 	tcp = NULL;
3780 	icmp = NULL;
3781 	csump = NULL;
3782 	np = nat->nat_ptr;
3783 
3784 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
3785 		(void) fr_nat_newfrag(fin, 0, nat);
3786 
3787 	MUTEX_ENTER(&nat->nat_lock);
3788 	nat->nat_bytes[1] += fin->fin_plen;
3789 	nat->nat_pkts[1]++;
3790 	MUTEX_EXIT(&nat->nat_lock);
3791 
3792 	/*
3793 	 * Fix up checksums, not by recalculating them, but
3794 	 * simply computing adjustments.
3795 	 * This is only done for STREAMS based IP implementations where the
3796 	 * checksum has already been calculated by IP.  In all other cases,
3797 	 * IPFilter is called before the checksum needs calculating so there
3798 	 * is no call to modify whatever is in the header now.
3799 	 */
3800 	ASSERT(fin->fin_m != NULL);
3801 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3802 		if (nflags == IPN_ICMPERR) {
3803 			u_32_t s1, s2;
3804 
3805 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3806 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3807 			CALC_SUMD(s1, s2, sumd);
3808 
3809 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3810 		}
3811 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3812     defined(linux) || defined(BRIDGE_IPF)
3813 		else {
3814 			/*
3815 			 * Strictly speaking, this isn't necessary on BSD
3816 			 * kernels because they do checksum calculation after
3817 			 * this code has run BUT if ipfilter is being used
3818 			 * to do NAT as a bridge, that code doesn't exist.
3819 			 */
3820 			if (nat->nat_dir == NAT_OUTBOUND)
3821 				fix_outcksum(&fin->fin_ip->ip_sum,
3822 					    nat->nat_ipsumd);
3823 			else
3824 				fix_incksum(&fin->fin_ip->ip_sum,
3825 				 	   nat->nat_ipsumd);
3826 		}
3827 #endif
3828 	}
3829 
3830 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3831 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3832 			tcp = fin->fin_dp;
3833 
3834 			tcp->th_sport = nat->nat_outport;
3835 			fin->fin_data[0] = ntohs(nat->nat_outport);
3836 		}
3837 
3838 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3839 			icmp = fin->fin_dp;
3840 			icmp->icmp_id = nat->nat_outport;
3841 		}
3842 
3843 		csump = nat_proto(fin, nat, nflags);
3844 	}
3845 
3846 	fin->fin_ip->ip_src = nat->nat_outip;
3847 
3848 	nat_update(fin, nat, np);
3849 
3850 	/*
3851 	 * The above comments do not hold for layer 4 (or higher) checksums...
3852 	 */
3853 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3854 		if (nflags & IPN_TCPUDP &&
3855 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3856 			sumd = nat->nat_sumd[1];
3857 		else
3858 			sumd = nat->nat_sumd[0];
3859 
3860 		if (nat->nat_dir == NAT_OUTBOUND)
3861 			fix_outcksum(csump, sumd);
3862 		else
3863 			fix_incksum(csump, sumd);
3864 	}
3865 #ifdef	IPFILTER_SYNC
3866 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3867 #endif
3868 	/* ------------------------------------------------------------- */
3869 	/* A few quick notes:						 */
3870 	/*	Following are test conditions prior to calling the 	 */
3871 	/*	appr_check routine.					 */
3872 	/*								 */
3873 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3874 	/*	with a redirect rule, we attempt to match the packet's	 */
3875 	/*	source port against in_dport, otherwise	we'd compare the */
3876 	/*	packet's destination.			 		 */
3877 	/* ------------------------------------------------------------- */
3878 	if ((np != NULL) && (np->in_apr != NULL)) {
3879 		i = appr_check(fin, nat);
3880 		if (i == 0)
3881 			i = 1;
3882 	} else
3883 		i = 1;
3884 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3885 	fin->fin_flx |= FI_NATED;
3886 	return i;
3887 }
3888 
3889 
3890 /* ------------------------------------------------------------------------ */
3891 /* Function:    fr_checknatin                                               */
3892 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3893 /*                     0 == no packet translation occurred,                 */
3894 /*                     1 == packet was successfully translated.             */
3895 /* Parameters:  fin(I)   - pointer to packet information                    */
3896 /*              passp(I) - pointer to filtering result flags                */
3897 /*                                                                          */
3898 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
3899 /* first checked to see if they match an existing entry (if an error),      */
3900 /* otherwise a search of the current NAT table is made.  If neither results */
3901 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3902 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3903 /* packet header(s) as required.                                            */
3904 /* ------------------------------------------------------------------------ */
3905 int fr_checknatin(fin, passp)
3906 fr_info_t *fin;
3907 u_32_t *passp;
3908 {
3909 	u_int nflags, natadd;
3910 	int rval, natfailed;
3911 	struct ifnet *ifp;
3912 	struct in_addr in;
3913 	icmphdr_t *icmp;
3914 	tcphdr_t *tcp;
3915 	u_short dport;
3916 	ipnat_t *np;
3917 	nat_t *nat;
3918 	u_32_t iph;
3919 	ipf_stack_t *ifs = fin->fin_ifs;
3920 
3921 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3922 		return 0;
3923 
3924 	tcp = NULL;
3925 	icmp = NULL;
3926 	dport = 0;
3927 	natadd = 1;
3928 	nflags = 0;
3929 	natfailed = 0;
3930 	ifp = fin->fin_ifp;
3931 
3932 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3933 		switch (fin->fin_p)
3934 		{
3935 		case IPPROTO_TCP :
3936 			nflags = IPN_TCP;
3937 			break;
3938 		case IPPROTO_UDP :
3939 			nflags = IPN_UDP;
3940 			break;
3941 		case IPPROTO_ICMP :
3942 			icmp = fin->fin_dp;
3943 
3944 			/*
3945 			 * This is an incoming packet, so the destination is
3946 			 * the icmp_id and the source port equals 0
3947 			 */
3948 			if (nat_icmpquerytype4(icmp->icmp_type)) {
3949 				nflags = IPN_ICMPQUERY;
3950 				dport = icmp->icmp_id;
3951 			} break;
3952 		default :
3953 			break;
3954 		}
3955 
3956 		if ((nflags & IPN_TCPUDP)) {
3957 			tcp = fin->fin_dp;
3958 			dport = tcp->th_dport;
3959 		}
3960 	}
3961 
3962 	in = fin->fin_dst;
3963 
3964 	READ_ENTER(&ifs->ifs_ipf_nat);
3965 
3966 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3967 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
3968 		/*EMPTY*/;
3969 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3970 		natadd = 0;
3971 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3972 				     fin->fin_src, in))) {
3973 		nflags = nat->nat_flags;
3974 	} else {
3975 		u_32_t hv, msk, rmsk;
3976 
3977 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3978 		rmsk = ifs->ifs_rdr_masks;
3979 		msk = 0xffffffff;
3980 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3981 		/*
3982 		 * If there is no current entry in the nat table for this IP#,
3983 		 * create one for it (if there is a matching rule).
3984 		 */
3985 maskloop:
3986 		iph = in.s_addr & htonl(msk);
3987 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
3988 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
3989 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
3990 				continue;
3991 			if (np->in_v != fin->fin_v)
3992 				continue;
3993 			if (np->in_p && (np->in_p != fin->fin_p))
3994 				continue;
3995 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3996 				continue;
3997 			if (np->in_flags & IPN_FILTER) {
3998 				if (!nat_match(fin, np))
3999 					continue;
4000 			} else {
4001 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4002 					continue;
4003 				if (np->in_pmin &&
4004 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4005 				     (ntohs(dport) < ntohs(np->in_pmin))))
4006 					continue;
4007 			}
4008 
4009 			if (*np->in_plabel != '\0') {
4010 				if (!appr_ok(fin, tcp, np)) {
4011 					continue;
4012 				}
4013 			}
4014 
4015 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4016 			if (nat != NULL) {
4017 				np->in_hits++;
4018 				break;
4019 			} else
4020 				natfailed = -1;
4021 		}
4022 
4023 		if ((np == NULL) && (rmsk != 0)) {
4024 			while (rmsk) {
4025 				msk <<= 1;
4026 				if (rmsk & 0x80000000)
4027 					break;
4028 				rmsk <<= 1;
4029 			}
4030 			if (rmsk != 0) {
4031 				rmsk <<= 1;
4032 				goto maskloop;
4033 			}
4034 		}
4035 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4036 	}
4037 	if (nat != NULL) {
4038 		rval = fr_natin(fin, nat, natadd, nflags);
4039 		if (rval == 1) {
4040 			MUTEX_ENTER(&nat->nat_lock);
4041 			nat->nat_ref++;
4042 			MUTEX_EXIT(&nat->nat_lock);
4043 			nat->nat_touched = ifs->ifs_fr_ticks;
4044 			fin->fin_nat = nat;
4045 			fin->fin_state = nat->nat_state;
4046 		}
4047 	} else
4048 		rval = natfailed;
4049 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4050 
4051 	if (rval == -1) {
4052 		if (passp != NULL)
4053 			*passp = FR_BLOCK;
4054 		fin->fin_flx |= FI_BADNAT;
4055 	}
4056 	return rval;
4057 }
4058 
4059 
4060 /* ------------------------------------------------------------------------ */
4061 /* Function:    fr_natin                                                    */
4062 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4063 /*                     1 == packet was successfully translated.             */
4064 /* Parameters:  fin(I)    - pointer to packet information                   */
4065 /*              nat(I)    - pointer to NAT structure                        */
4066 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4067 /*              nflags(I) - NAT flags set for this packet                   */
4068 /* Locks Held:  ipf_nat (READ)                                              */
4069 /*                                                                          */
4070 /* Translate a packet coming "in" on an interface.                          */
4071 /* ------------------------------------------------------------------------ */
4072 int fr_natin(fin, nat, natadd, nflags)
4073 fr_info_t *fin;
4074 nat_t *nat;
4075 int natadd;
4076 u_32_t nflags;
4077 {
4078 	icmphdr_t *icmp;
4079 	u_short *csump;
4080 	tcphdr_t *tcp;
4081 	ipnat_t *np;
4082 	int i;
4083 	ipf_stack_t *ifs = fin->fin_ifs;
4084 
4085 #if SOLARIS && defined(_KERNEL)
4086 	net_data_t net_data_p;
4087 	if (fin->fin_v == 4)
4088 		net_data_p = ifs->ifs_ipf_ipv4;
4089 	else
4090 		net_data_p = ifs->ifs_ipf_ipv6;
4091 #endif
4092 
4093 	tcp = NULL;
4094 	csump = NULL;
4095 	np = nat->nat_ptr;
4096 	fin->fin_fr = nat->nat_fr;
4097 
4098 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4099 		(void) fr_nat_newfrag(fin, 0, nat);
4100 
4101 	if (np != NULL) {
4102 
4103 	/* ------------------------------------------------------------- */
4104 	/* A few quick notes:						 */
4105 	/*	Following are test conditions prior to calling the 	 */
4106 	/*	appr_check routine.					 */
4107 	/*								 */
4108 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4109 	/*	with a map rule, we attempt to match the packet's	 */
4110 	/*	source port against in_dport, otherwise	we'd compare the */
4111 	/*	packet's destination.			 		 */
4112 	/* ------------------------------------------------------------- */
4113 		if (np->in_apr != NULL) {
4114 			i = appr_check(fin, nat);
4115 			if (i == -1) {
4116 				return -1;
4117 			}
4118 		}
4119 	}
4120 
4121 #ifdef	IPFILTER_SYNC
4122 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4123 #endif
4124 
4125 	MUTEX_ENTER(&nat->nat_lock);
4126 	nat->nat_bytes[0] += fin->fin_plen;
4127 	nat->nat_pkts[0]++;
4128 	MUTEX_EXIT(&nat->nat_lock);
4129 
4130 	fin->fin_ip->ip_dst = nat->nat_inip;
4131 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4132 	if (nflags & IPN_TCPUDP)
4133 		tcp = fin->fin_dp;
4134 
4135 	/*
4136 	 * Fix up checksums, not by recalculating them, but
4137 	 * simply computing adjustments.
4138 	 * Why only do this for some platforms on inbound packets ?
4139 	 * Because for those that it is done, IP processing is yet to happen
4140 	 * and so the IPv4 header checksum has not yet been evaluated.
4141 	 * Perhaps it should always be done for the benefit of things like
4142 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4143 	 * header checksum offloading, perhaps it is a moot point.
4144 	 */
4145 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4146      defined(__osf__) || defined(linux)
4147 	if (nat->nat_dir == NAT_OUTBOUND)
4148 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4149 	else
4150 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4151 #endif
4152 
4153 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4154 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4155 			tcp->th_dport = nat->nat_inport;
4156 			fin->fin_data[1] = ntohs(nat->nat_inport);
4157 		}
4158 
4159 
4160 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4161 			icmp = fin->fin_dp;
4162 
4163 			icmp->icmp_id = nat->nat_inport;
4164 		}
4165 
4166 		csump = nat_proto(fin, nat, nflags);
4167 	}
4168 
4169 	nat_update(fin, nat, np);
4170 
4171 	/*
4172 	 * In case they are being forwarded, inbound packets always need to have
4173 	 * their checksum adjusted even if hardware checksum validation said OK.
4174 	 */
4175 	if (csump != NULL) {
4176 		if (nat->nat_dir == NAT_OUTBOUND)
4177 			fix_incksum(csump, nat->nat_sumd[0]);
4178 		else
4179 			fix_outcksum(csump, nat->nat_sumd[0]);
4180 	}
4181 
4182 #if SOLARIS && defined(_KERNEL)
4183 	if (nflags & IPN_TCPUDP &&
4184 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4185 		/*
4186 		 * Need to adjust the partial checksum result stored in
4187 		 * db_cksum16, which will be used for validation in IP.
4188 		 * See IP_CKSUM_RECV().
4189 		 * Adjustment data should be the inverse of the IP address
4190 		 * changes, because db_cksum16 is supposed to be the complement
4191 		 * of the pesudo header.
4192 		 */
4193 		csump = &fin->fin_m->b_datap->db_cksum16;
4194 		if (nat->nat_dir == NAT_OUTBOUND)
4195 			fix_outcksum(csump, nat->nat_sumd[1]);
4196 		else
4197 			fix_incksum(csump, nat->nat_sumd[1]);
4198 	}
4199 #endif
4200 
4201 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4202 	fin->fin_flx |= FI_NATED;
4203 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4204 		fin->fin_nattag = &np->in_tag;
4205 	return 1;
4206 }
4207 
4208 
4209 /* ------------------------------------------------------------------------ */
4210 /* Function:    nat_proto                                                   */
4211 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4212 /*                         NULL if the transport protocol is not recognised */
4213 /*                         as needing a checksum update.                    */
4214 /* Parameters:  fin(I)    - pointer to packet information                   */
4215 /*              nat(I)    - pointer to NAT structure                        */
4216 /*              nflags(I) - NAT flags set for this packet                   */
4217 /*                                                                          */
4218 /* Return the pointer to the checksum field for each protocol so understood.*/
4219 /* If support for making other changes to a protocol header is required,    */
4220 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4221 /* TCP down to a specific value, then do it from here.                      */
4222 /* ------------------------------------------------------------------------ */
4223 u_short *nat_proto(fin, nat, nflags)
4224 fr_info_t *fin;
4225 nat_t *nat;
4226 u_int nflags;
4227 {
4228 	icmphdr_t *icmp;
4229 	u_short *csump;
4230 	tcphdr_t *tcp;
4231 	udphdr_t *udp;
4232 
4233 	csump = NULL;
4234 	if (fin->fin_out == 0) {
4235 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4236 	} else {
4237 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4238 	}
4239 
4240 	switch (fin->fin_p)
4241 	{
4242 	case IPPROTO_TCP :
4243 		tcp = fin->fin_dp;
4244 
4245 		csump = &tcp->th_sum;
4246 
4247 		/*
4248 		 * Do a MSS CLAMPING on a SYN packet,
4249 		 * only deal IPv4 for now.
4250 		 */
4251 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4252 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4253 
4254 		break;
4255 
4256 	case IPPROTO_UDP :
4257 		udp = fin->fin_dp;
4258 
4259 		if (udp->uh_sum)
4260 			csump = &udp->uh_sum;
4261 		break;
4262 
4263 	case IPPROTO_ICMP :
4264 		icmp = fin->fin_dp;
4265 
4266 		if ((nflags & IPN_ICMPQUERY) != 0) {
4267 			if (icmp->icmp_cksum != 0)
4268 				csump = &icmp->icmp_cksum;
4269 		}
4270 		break;
4271 	}
4272 	return csump;
4273 }
4274 
4275 
4276 /* ------------------------------------------------------------------------ */
4277 /* Function:    fr_natunload                                                */
4278 /* Returns:     Nil                                                         */
4279 /* Parameters:  Nil                                                         */
4280 /*                                                                          */
4281 /* Free all memory used by NAT structures allocated at runtime.             */
4282 /* ------------------------------------------------------------------------ */
4283 void fr_natunload(ifs)
4284 ipf_stack_t *ifs;
4285 {
4286 	ipftq_t *ifq, *ifqnext;
4287 
4288 	(void) nat_clearlist(ifs);
4289 	(void) nat_flushtable(ifs);
4290 
4291 	/*
4292 	 * Proxy timeout queues are not cleaned here because although they
4293 	 * exist on the NAT list, appr_unload is called after fr_natunload
4294 	 * and the proxies actually are responsible for them being created.
4295 	 * Should the proxy timeouts have their own list?  There's no real
4296 	 * justification as this is the only complication.
4297 	 */
4298 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4299 		ifqnext = ifq->ifq_next;
4300 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4301 		    (fr_deletetimeoutqueue(ifq) == 0))
4302 			fr_freetimeoutqueue(ifq, ifs);
4303 	}
4304 
4305 	if (ifs->ifs_nat_table[0] != NULL) {
4306 		KFREES(ifs->ifs_nat_table[0],
4307 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4308 		ifs->ifs_nat_table[0] = NULL;
4309 	}
4310 	if (ifs->ifs_nat_table[1] != NULL) {
4311 		KFREES(ifs->ifs_nat_table[1],
4312 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4313 		ifs->ifs_nat_table[1] = NULL;
4314 	}
4315 	if (ifs->ifs_nat_rules != NULL) {
4316 		KFREES(ifs->ifs_nat_rules,
4317 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4318 		ifs->ifs_nat_rules = NULL;
4319 	}
4320 	if (ifs->ifs_rdr_rules != NULL) {
4321 		KFREES(ifs->ifs_rdr_rules,
4322 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4323 		ifs->ifs_rdr_rules = NULL;
4324 	}
4325 	if (ifs->ifs_maptable != NULL) {
4326 		KFREES(ifs->ifs_maptable,
4327 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4328 		ifs->ifs_maptable = NULL;
4329 	}
4330 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4331 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4332 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4333 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4334 	}
4335 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4336 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4337 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4338 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4339 	}
4340 
4341 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4342 		ifs->ifs_fr_nat_maxbucket = 0;
4343 
4344 	if (ifs->ifs_fr_nat_init == 1) {
4345 		ifs->ifs_fr_nat_init = 0;
4346 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4347 
4348 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4349 		RW_DESTROY(&ifs->ifs_ipf_nat);
4350 
4351 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4352 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4353 
4354 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4355 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4356 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4357 	}
4358 }
4359 
4360 
4361 /* ------------------------------------------------------------------------ */
4362 /* Function:    fr_natexpire                                                */
4363 /* Returns:     Nil                                                         */
4364 /* Parameters:  Nil                                                         */
4365 /*                                                                          */
4366 /* Check all of the timeout queues for entries at the top which need to be  */
4367 /* expired.                                                                 */
4368 /* ------------------------------------------------------------------------ */
4369 void fr_natexpire(ifs)
4370 ipf_stack_t *ifs;
4371 {
4372 	ipftq_t *ifq, *ifqnext;
4373 	ipftqent_t *tqe, *tqn;
4374 	int i;
4375 	SPL_INT(s);
4376 
4377 	SPL_NET(s);
4378 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4379 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4380 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4381 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4382 				break;
4383 			tqn = tqe->tqe_next;
4384 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4385 		}
4386 	}
4387 
4388 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4389 		ifqnext = ifq->ifq_next;
4390 
4391 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4392 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4393 				break;
4394 			tqn = tqe->tqe_next;
4395 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4396 		}
4397 	}
4398 
4399 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4400 		ifqnext = ifq->ifq_next;
4401 
4402 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4403 		    (ifq->ifq_ref == 0)) {
4404 			fr_freetimeoutqueue(ifq, ifs);
4405 		}
4406 	}
4407 
4408 	if (ifs->ifs_nat_doflush != 0) {
4409 		(void) nat_extraflush(2, ifs);
4410 		ifs->ifs_nat_doflush = 0;
4411 	}
4412 
4413 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4414 	SPL_X(s);
4415 }
4416 
4417 
4418 /* ------------------------------------------------------------------------ */
4419 /* Function:    fr_nataddrsync                                              */
4420 /* Returns:     Nil                                                         */
4421 /* Parameters:  ifp(I) -  pointer to network interface                      */
4422 /*              addr(I) - pointer to new network address                    */
4423 /*                                                                          */
4424 /* Walk through all of the currently active NAT sessions, looking for those */
4425 /* which need to have their translated address updated (where the interface */
4426 /* matches the one passed in) and change it, recalculating the checksum sum */
4427 /* difference too.                                                          */
4428 /* ------------------------------------------------------------------------ */
4429 void fr_nataddrsync(ifp, addr, ifs)
4430 void *ifp;
4431 struct in_addr *addr;
4432 ipf_stack_t *ifs;
4433 {
4434 	u_32_t sum1, sum2, sumd;
4435 	nat_t *nat;
4436 	ipnat_t *np;
4437 	SPL_INT(s);
4438 
4439 	if (ifs->ifs_fr_running <= 0)
4440 		return;
4441 
4442 	SPL_NET(s);
4443 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4444 
4445 	if (ifs->ifs_fr_running <= 0) {
4446 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4447 		return;
4448 	}
4449 
4450 	/*
4451 	 * Change IP addresses for NAT sessions for any protocol except TCP
4452 	 * since it will break the TCP connection anyway.  The only rules
4453 	 * which will get changed are those which are "map ... -> 0/32",
4454 	 * where the rule specifies the address is taken from the interface.
4455 	 */
4456 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4457 		if (addr != NULL) {
4458 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4459 			    ((nat->nat_flags & IPN_TCP) != 0))
4460 				continue;
4461 			if (((np = nat->nat_ptr) == NULL) ||
4462 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4463 				continue;
4464 
4465 			/*
4466 			 * Change the map-to address to be the same as the
4467 			 * new one.
4468 			 */
4469 			sum1 = nat->nat_outip.s_addr;
4470 			nat->nat_outip = *addr;
4471 			sum2 = nat->nat_outip.s_addr;
4472 
4473 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4474 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4475 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4476 			struct in_addr in;
4477 
4478 			/*
4479 			 * Change the map-to address to be the same as the
4480 			 * new one.
4481 			 */
4482 			sum1 = nat->nat_outip.s_addr;
4483 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4484 				       &in, NULL, ifs) != -1)
4485 				nat->nat_outip = in;
4486 			sum2 = nat->nat_outip.s_addr;
4487 		} else {
4488 			continue;
4489 		}
4490 
4491 		if (sum1 == sum2)
4492 			continue;
4493 		/*
4494 		 * Readjust the checksum adjustment to take into
4495 		 * account the new IP#.
4496 		 */
4497 		CALC_SUMD(sum1, sum2, sumd);
4498 		/* XXX - dont change for TCP when solaris does
4499 		 * hardware checksumming.
4500 		 */
4501 		sumd += nat->nat_sumd[0];
4502 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4503 		nat->nat_sumd[1] = nat->nat_sumd[0];
4504 	}
4505 
4506 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4507 	SPL_X(s);
4508 }
4509 
4510 
4511 /* ------------------------------------------------------------------------ */
4512 /* Function:    fr_natifpsync                                               */
4513 /* Returns:     Nil                                                         */
4514 /* Parameters:  action(I) - how we are syncing                              */
4515 /*              ifp(I)    - pointer to network interface                    */
4516 /*              name(I)   - name of interface to sync to                    */
4517 /*                                                                          */
4518 /* This function is used to resync the mapping of interface names and their */
4519 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4520 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4521 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4522 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4523 /* there is no longer any interface associated with it.                     */
4524 /* ------------------------------------------------------------------------ */
4525 void fr_natifpsync(action, ifp, name, ifs)
4526 int action;
4527 void *ifp;
4528 char *name;
4529 ipf_stack_t *ifs;
4530 {
4531 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4532 	int s;
4533 #endif
4534 	nat_t *nat;
4535 	ipnat_t *n;
4536 
4537 	if (ifs->ifs_fr_running <= 0)
4538 		return;
4539 
4540 	SPL_NET(s);
4541 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4542 
4543 	if (ifs->ifs_fr_running <= 0) {
4544 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4545 		return;
4546 	}
4547 
4548 	switch (action)
4549 	{
4550 	case IPFSYNC_RESYNC :
4551 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4552 			if ((ifp == nat->nat_ifps[0]) ||
4553 			    (nat->nat_ifps[0] == (void *)-1)) {
4554 				nat->nat_ifps[0] =
4555 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4556 			}
4557 
4558 			if ((ifp == nat->nat_ifps[1]) ||
4559 			    (nat->nat_ifps[1] == (void *)-1)) {
4560 				nat->nat_ifps[1] =
4561 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4562 			}
4563 		}
4564 
4565 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4566 			if (n->in_ifps[0] == ifp ||
4567 			    n->in_ifps[0] == (void *)-1) {
4568 				n->in_ifps[0] =
4569 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4570 			}
4571 			if (n->in_ifps[1] == ifp ||
4572 			    n->in_ifps[1] == (void *)-1) {
4573 				n->in_ifps[1] =
4574 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4575 			}
4576 		}
4577 		break;
4578 	case IPFSYNC_NEWIFP :
4579 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4580 			if (!strncmp(name, nat->nat_ifnames[0],
4581 				     sizeof(nat->nat_ifnames[0])))
4582 				nat->nat_ifps[0] = ifp;
4583 			if (!strncmp(name, nat->nat_ifnames[1],
4584 				     sizeof(nat->nat_ifnames[1])))
4585 				nat->nat_ifps[1] = ifp;
4586 		}
4587 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4588 			if (!strncmp(name, n->in_ifnames[0],
4589 				     sizeof(n->in_ifnames[0])))
4590 				n->in_ifps[0] = ifp;
4591 			if (!strncmp(name, n->in_ifnames[1],
4592 				     sizeof(n->in_ifnames[1])))
4593 				n->in_ifps[1] = ifp;
4594 		}
4595 		break;
4596 	case IPFSYNC_OLDIFP :
4597 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4598 			if (ifp == nat->nat_ifps[0])
4599 				nat->nat_ifps[0] = (void *)-1;
4600 			if (ifp == nat->nat_ifps[1])
4601 				nat->nat_ifps[1] = (void *)-1;
4602 		}
4603 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4604 			if (n->in_ifps[0] == ifp)
4605 				n->in_ifps[0] = (void *)-1;
4606 			if (n->in_ifps[1] == ifp)
4607 				n->in_ifps[1] = (void *)-1;
4608 		}
4609 		break;
4610 	}
4611 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4612 	SPL_X(s);
4613 }
4614 
4615 
4616 /* ------------------------------------------------------------------------ */
4617 /* Function:    nat_icmpquerytype4                                          */
4618 /* Returns:     int - 1 == success, 0 == failure                            */
4619 /* Parameters:  icmptype(I) - ICMP type number                              */
4620 /*                                                                          */
4621 /* Tests to see if the ICMP type number passed is a query/response type or  */
4622 /* not.                                                                     */
4623 /* ------------------------------------------------------------------------ */
4624 static INLINE int nat_icmpquerytype4(icmptype)
4625 int icmptype;
4626 {
4627 
4628 	/*
4629 	 * For the ICMP query NAT code, it is essential that both the query
4630 	 * and the reply match on the NAT rule. Because the NAT structure
4631 	 * does not keep track of the icmptype, and a single NAT structure
4632 	 * is used for all icmp types with the same src, dest and id, we
4633 	 * simply define the replies as queries as well. The funny thing is,
4634 	 * altough it seems silly to call a reply a query, this is exactly
4635 	 * as it is defined in the IPv4 specification
4636 	 */
4637 
4638 	switch (icmptype)
4639 	{
4640 
4641 	case ICMP_ECHOREPLY:
4642 	case ICMP_ECHO:
4643 	/* route aedvertisement/solliciation is currently unsupported: */
4644 	/* it would require rewriting the ICMP data section            */
4645 	case ICMP_TSTAMP:
4646 	case ICMP_TSTAMPREPLY:
4647 	case ICMP_IREQ:
4648 	case ICMP_IREQREPLY:
4649 	case ICMP_MASKREQ:
4650 	case ICMP_MASKREPLY:
4651 		return 1;
4652 	default:
4653 		return 0;
4654 	}
4655 }
4656 
4657 
4658 /* ------------------------------------------------------------------------ */
4659 /* Function:    nat_log                                                     */
4660 /* Returns:     Nil                                                         */
4661 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4662 /*              type(I) - type of log entry to create                       */
4663 /*                                                                          */
4664 /* Creates a NAT log entry.                                                 */
4665 /* ------------------------------------------------------------------------ */
4666 void nat_log(nat, type, ifs)
4667 struct nat *nat;
4668 u_int type;
4669 ipf_stack_t *ifs;
4670 {
4671 #ifdef	IPFILTER_LOG
4672 # ifndef LARGE_NAT
4673 	struct ipnat *np;
4674 	int rulen;
4675 # endif
4676 	struct natlog natl;
4677 	void *items[1];
4678 	size_t sizes[1];
4679 	int types[1];
4680 
4681 	natl.nl_inip = nat->nat_inip;
4682 	natl.nl_outip = nat->nat_outip;
4683 	natl.nl_origip = nat->nat_oip;
4684 	natl.nl_bytes[0] = nat->nat_bytes[0];
4685 	natl.nl_bytes[1] = nat->nat_bytes[1];
4686 	natl.nl_pkts[0] = nat->nat_pkts[0];
4687 	natl.nl_pkts[1] = nat->nat_pkts[1];
4688 	natl.nl_origport = nat->nat_oport;
4689 	natl.nl_inport = nat->nat_inport;
4690 	natl.nl_outport = nat->nat_outport;
4691 	natl.nl_p = nat->nat_p;
4692 	natl.nl_type = type;
4693 	natl.nl_rule = -1;
4694 # ifndef LARGE_NAT
4695 	if (nat->nat_ptr != NULL) {
4696 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4697 		     np = np->in_next, rulen++)
4698 			if (np == nat->nat_ptr) {
4699 				natl.nl_rule = rulen;
4700 				break;
4701 			}
4702 	}
4703 # endif
4704 	items[0] = &natl;
4705 	sizes[0] = sizeof(natl);
4706 	types[0] = 0;
4707 
4708 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4709 #endif
4710 }
4711 
4712 
4713 #if defined(__OpenBSD__)
4714 /* ------------------------------------------------------------------------ */
4715 /* Function:    nat_ifdetach                                                */
4716 /* Returns:     Nil                                                         */
4717 /* Parameters:  ifp(I) - pointer to network interface                       */
4718 /*                                                                          */
4719 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4720 /* interface references within IPFilter.                                    */
4721 /* ------------------------------------------------------------------------ */
4722 void nat_ifdetach(ifp, ifs)
4723 void *ifp;
4724 ipf_stack_t *ifs;
4725 {
4726 	frsync(ifp, ifs);
4727 	return;
4728 }
4729 #endif
4730 
4731 
4732 /* ------------------------------------------------------------------------ */
4733 /* Function:    fr_ipnatderef                                               */
4734 /* Returns:     Nil                                                         */
4735 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4736 /* Write Locks: ipf_nat                                                     */
4737 /*                                                                          */
4738 /* ------------------------------------------------------------------------ */
4739 void fr_ipnatderef(inp, ifs)
4740 ipnat_t **inp;
4741 ipf_stack_t *ifs;
4742 {
4743 	ipnat_t *in;
4744 
4745 	in = *inp;
4746 	*inp = NULL;
4747 	in->in_space++;
4748 	in->in_use--;
4749 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4750 		if (in->in_apr)
4751 			appr_free(in->in_apr);
4752 		KFREE(in);
4753 		ifs->ifs_nat_stats.ns_rules--;
4754 #ifdef notdef
4755 #if SOLARIS
4756 		if (ifs->ifs_nat_stats.ns_rules == 0)
4757 			ifs->ifs_pfil_delayed_copy = 1;
4758 #endif
4759 #endif
4760 	}
4761 }
4762 
4763 
4764 /* ------------------------------------------------------------------------ */
4765 /* Function:    fr_natderef                                                 */
4766 /* Returns:     Nil                                                         */
4767 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4768 /*                                                                          */
4769 /* Decrement the reference counter for this NAT table entry and free it if  */
4770 /* there are no more things using it.                                       */
4771 /*                                                                          */
4772 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4773 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4774 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4775 /* because nat_delete() will do that and send nat_ref to -1.                */
4776 /*                                                                          */
4777 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4778 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4779 /* ------------------------------------------------------------------------ */
4780 void fr_natderef(natp, ifs)
4781 nat_t **natp;
4782 ipf_stack_t *ifs;
4783 {
4784 	nat_t *nat;
4785 
4786 	nat = *natp;
4787 	*natp = NULL;
4788 
4789 	MUTEX_ENTER(&nat->nat_lock);
4790 	if (nat->nat_ref > 1) {
4791 		nat->nat_ref--;
4792 		MUTEX_EXIT(&nat->nat_lock);
4793 		return;
4794 	}
4795 	MUTEX_EXIT(&nat->nat_lock);
4796 
4797 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4798 	nat_delete(nat, NL_EXPIRE, ifs);
4799 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4800 }
4801 
4802 
4803 /* ------------------------------------------------------------------------ */
4804 /* Function:    fr_natclone                                                 */
4805 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4806 /*                           else pointer to new state structure            */
4807 /* Parameters:  fin(I) - pointer to packet information                      */
4808 /*              is(I)  - pointer to master state structure                  */
4809 /* Write Lock:  ipf_nat                                                     */
4810 /*                                                                          */
4811 /* Create a "duplcate" state table entry from the master.                   */
4812 /* ------------------------------------------------------------------------ */
4813 static nat_t *fr_natclone(fin, nat)
4814 fr_info_t *fin;
4815 nat_t *nat;
4816 {
4817 	frentry_t *fr;
4818 	nat_t *clone;
4819 	ipnat_t *np;
4820 	ipf_stack_t *ifs = fin->fin_ifs;
4821 
4822 	KMALLOC(clone, nat_t *);
4823 	if (clone == NULL)
4824 		return NULL;
4825 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4826 
4827 	MUTEX_NUKE(&clone->nat_lock);
4828 
4829 	clone->nat_aps = NULL;
4830 	/*
4831 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4832 	 */
4833 	clone->nat_tqe.tqe_pnext = NULL;
4834 	clone->nat_tqe.tqe_next = NULL;
4835 	clone->nat_tqe.tqe_ifq = NULL;
4836 	clone->nat_tqe.tqe_parent = clone;
4837 
4838 	clone->nat_flags &= ~SI_CLONE;
4839 	clone->nat_flags |= SI_CLONED;
4840 
4841 	if (clone->nat_hm)
4842 		clone->nat_hm->hm_ref++;
4843 
4844 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4845 		KFREE(clone);
4846 		return NULL;
4847 	}
4848 	np = clone->nat_ptr;
4849 	if (np != NULL) {
4850 		if (ifs->ifs_nat_logging)
4851 			nat_log(clone, (u_int)np->in_redir, ifs);
4852 		np->in_use++;
4853 	}
4854 	fr = clone->nat_fr;
4855 	if (fr != NULL) {
4856 		MUTEX_ENTER(&fr->fr_lock);
4857 		fr->fr_ref++;
4858 		MUTEX_EXIT(&fr->fr_lock);
4859 	}
4860 
4861 	/*
4862 	 * Because the clone is created outside the normal loop of things and
4863 	 * TCP has special needs in terms of state, initialise the timeout
4864 	 * state of the new NAT from here.
4865 	 */
4866 	if (clone->nat_p == IPPROTO_TCP) {
4867 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4868 				  clone->nat_flags);
4869 	}
4870 #ifdef	IPFILTER_SYNC
4871 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4872 #endif
4873 	if (ifs->ifs_nat_logging)
4874 		nat_log(clone, NL_CLONE, ifs);
4875 	return clone;
4876 }
4877 
4878 
4879 /* ------------------------------------------------------------------------ */
4880 /* Function:   nat_wildok                                                   */
4881 /* Returns:    int - 1 == packet's ports match wildcards                    */
4882 /*                   0 == packet's ports don't match wildcards              */
4883 /* Parameters: nat(I)   - NAT entry                                         */
4884 /*             sport(I) - source port                                       */
4885 /*             dport(I) - destination port                                  */
4886 /*             flags(I) - wildcard flags                                    */
4887 /*             dir(I)   - packet direction                                  */
4888 /*                                                                          */
4889 /* Use NAT entry and packet direction to determine which combination of     */
4890 /* wildcard flags should be used.                                           */
4891 /* ------------------------------------------------------------------------ */
4892 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4893 nat_t *nat;
4894 int sport;
4895 int dport;
4896 int flags;
4897 int dir;
4898 {
4899 	/*
4900 	 * When called by       dir is set to
4901 	 * nat_inlookup         NAT_INBOUND (0)
4902 	 * nat_outlookup        NAT_OUTBOUND (1)
4903 	 *
4904 	 * We simply combine the packet's direction in dir with the original
4905 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4906 	 * which combination of wildcard flags to allow.
4907 	 */
4908 
4909 	switch ((dir << 1) | nat->nat_dir)
4910 	{
4911 	case 3: /* outbound packet / outbound entry */
4912 		if (((nat->nat_inport == sport) ||
4913 		    (flags & SI_W_SPORT)) &&
4914 		    ((nat->nat_oport == dport) ||
4915 		    (flags & SI_W_DPORT)))
4916 			return 1;
4917 		break;
4918 	case 2: /* outbound packet / inbound entry */
4919 		if (((nat->nat_outport == sport) ||
4920 		    (flags & SI_W_DPORT)) &&
4921 		    ((nat->nat_oport == dport) ||
4922 		    (flags & SI_W_SPORT)))
4923 			return 1;
4924 		break;
4925 	case 1: /* inbound packet / outbound entry */
4926 		if (((nat->nat_oport == sport) ||
4927 		    (flags & SI_W_DPORT)) &&
4928 		    ((nat->nat_outport == dport) ||
4929 		    (flags & SI_W_SPORT)))
4930 			return 1;
4931 		break;
4932 	case 0: /* inbound packet / inbound entry */
4933 		if (((nat->nat_oport == sport) ||
4934 		    (flags & SI_W_SPORT)) &&
4935 		    ((nat->nat_outport == dport) ||
4936 		    (flags & SI_W_DPORT)))
4937 			return 1;
4938 		break;
4939 	default:
4940 		break;
4941 	}
4942 
4943 	return(0);
4944 }
4945 
4946 
4947 /* ------------------------------------------------------------------------ */
4948 /* Function:    nat_mssclamp                                                */
4949 /* Returns:     Nil                                                         */
4950 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4951 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4952 /*              csump(I)  - pointer to TCP checksum                         */
4953 /*                                                                          */
4954 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4955 /* then the TCP header checksum will be updated to reflect the change in    */
4956 /* the MSS.                                                                 */
4957 /* ------------------------------------------------------------------------ */
4958 static void nat_mssclamp(tcp, maxmss, csump)
4959 tcphdr_t *tcp;
4960 u_32_t maxmss;
4961 u_short *csump;
4962 {
4963 	u_char *cp, *ep, opt;
4964 	int hlen, advance;
4965 	u_32_t mss, sumd;
4966 
4967 	hlen = TCP_OFF(tcp) << 2;
4968 	if (hlen > sizeof(*tcp)) {
4969 		cp = (u_char *)tcp + sizeof(*tcp);
4970 		ep = (u_char *)tcp + hlen;
4971 
4972 		while (cp < ep) {
4973 			opt = cp[0];
4974 			if (opt == TCPOPT_EOL)
4975 				break;
4976 			else if (opt == TCPOPT_NOP) {
4977 				cp++;
4978 				continue;
4979 			}
4980 
4981 			if (cp + 1 >= ep)
4982 				break;
4983 			advance = cp[1];
4984 			if ((cp + advance > ep) || (advance <= 0))
4985 				break;
4986 			switch (opt)
4987 			{
4988 			case TCPOPT_MAXSEG:
4989 				if (advance != 4)
4990 					break;
4991 				mss = cp[2] * 256 + cp[3];
4992 				if (mss > maxmss) {
4993 					cp[2] = maxmss / 256;
4994 					cp[3] = maxmss & 0xff;
4995 					CALC_SUMD(mss, maxmss, sumd);
4996 					fix_outcksum(csump, sumd);
4997 				}
4998 				break;
4999 			default:
5000 				/* ignore unknown options */
5001 				break;
5002 			}
5003 
5004 			cp += advance;
5005 		}
5006 	}
5007 }
5008 
5009 
5010 /* ------------------------------------------------------------------------ */
5011 /* Function:    fr_setnatqueue                                              */
5012 /* Returns:     Nil                                                         */
5013 /* Parameters:  nat(I)- pointer to NAT structure                            */
5014 /*              rev(I) - forward(0) or reverse(1) direction                 */
5015 /* Locks:       ipf_nat (read or write)                                     */
5016 /*                                                                          */
5017 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5018 /* determining which queue it should be placed on.                          */
5019 /* ------------------------------------------------------------------------ */
5020 void fr_setnatqueue(nat, rev, ifs)
5021 nat_t *nat;
5022 int rev;
5023 ipf_stack_t *ifs;
5024 {
5025 	ipftq_t *oifq, *nifq;
5026 
5027 	if (nat->nat_ptr != NULL)
5028 		nifq = nat->nat_ptr->in_tqehead[rev];
5029 	else
5030 		nifq = NULL;
5031 
5032 	if (nifq == NULL) {
5033 		switch (nat->nat_p)
5034 		{
5035 		case IPPROTO_UDP :
5036 			nifq = &ifs->ifs_nat_udptq;
5037 			break;
5038 		case IPPROTO_ICMP :
5039 			nifq = &ifs->ifs_nat_icmptq;
5040 			break;
5041 		case IPPROTO_TCP :
5042 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5043 			break;
5044 		default :
5045 			nifq = &ifs->ifs_nat_iptq;
5046 			break;
5047 		}
5048 	}
5049 
5050 	oifq = nat->nat_tqe.tqe_ifq;
5051 	/*
5052 	 * If it's currently on a timeout queue, move it from one queue to
5053 	 * another, else put it on the end of the newly determined queue.
5054 	 */
5055 	if (oifq != NULL)
5056 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5057 	else
5058 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5059 	return;
5060 }
5061 
5062 /* ------------------------------------------------------------------------ */
5063 /* Function:    nat_getnext                                                 */
5064 /* Returns:     int - 0 == ok, else error                                   */
5065 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5066 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5067 /*              ifs - ipf stack instance                                    */
5068 /*                                                                          */
5069 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5070 /* and copy it out to the storage space pointed to by itp.  The next item   */
5071 /* in the list to look at is put back in the ipftoken struture.             */
5072 /* ------------------------------------------------------------------------ */
5073 static int nat_getnext(t, itp, ifs)
5074 ipftoken_t *t;
5075 ipfgeniter_t *itp;
5076 ipf_stack_t *ifs;
5077 {
5078 	hostmap_t *hm, *nexthm = NULL, zerohm;
5079 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5080 	nat_t *nat, *nextnat = NULL, zeronat;
5081 	int error = 0, count;
5082 	char *dst;
5083 
5084 	if (itp->igi_nitems == 0)
5085 		return EINVAL;
5086 
5087 	READ_ENTER(&ifs->ifs_ipf_nat);
5088 
5089 	switch (itp->igi_type)
5090 	{
5091 	case IPFGENITER_HOSTMAP :
5092 		hm = t->ipt_data;
5093 		if (hm == NULL) {
5094 			nexthm = ifs->ifs_ipf_hm_maplist;
5095 		} else {
5096 			nexthm = hm->hm_next;
5097 		}
5098 		break;
5099 
5100 	case IPFGENITER_IPNAT :
5101 		ipn = t->ipt_data;
5102 		if (ipn == NULL) {
5103 			nextipnat = ifs->ifs_nat_list;
5104 		} else {
5105 			nextipnat = ipn->in_next;
5106 		}
5107 		break;
5108 
5109 	case IPFGENITER_NAT :
5110 		nat = t->ipt_data;
5111 		if (nat == NULL) {
5112 			nextnat = ifs->ifs_nat_instances;
5113 		} else {
5114 			nextnat = nat->nat_next;
5115 		}
5116 		break;
5117 	default :
5118 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5119 		return EINVAL;
5120 	}
5121 
5122 	dst = itp->igi_data;
5123 	for (count = itp->igi_nitems; count > 0; count--) {
5124 		switch (itp->igi_type)
5125 		{
5126 		case IPFGENITER_HOSTMAP :
5127 			if (nexthm != NULL) {
5128 				ATOMIC_INC32(nexthm->hm_ref);
5129 				t->ipt_data = nexthm;
5130 			} else {
5131 				bzero(&zerohm, sizeof(zerohm));
5132 				nexthm = &zerohm;
5133 				count = 1;
5134 				t->ipt_data = NULL;
5135 			}
5136 			break;
5137 		case IPFGENITER_IPNAT :
5138 			if (nextipnat != NULL) {
5139 				ATOMIC_INC32(nextipnat->in_use);
5140 				t->ipt_data = nextipnat;
5141 			} else {
5142 				bzero(&zeroipn, sizeof(zeroipn));
5143 				nextipnat = &zeroipn;
5144 				count = 1;
5145 				t->ipt_data = NULL;
5146 			}
5147 			break;
5148 		case IPFGENITER_NAT :
5149 			if (nextnat != NULL) {
5150 				MUTEX_ENTER(&nextnat->nat_lock);
5151 				nextnat->nat_ref++;
5152 				MUTEX_EXIT(&nextnat->nat_lock);
5153 				t->ipt_data = nextnat;
5154 			} else {
5155 				bzero(&zeronat, sizeof(zeronat));
5156 				nextnat = &zeronat;
5157 				count = 1;
5158 				t->ipt_data = NULL;
5159 			}
5160 			break;
5161 		default :
5162 			break;
5163 		}
5164 
5165 		/*
5166 		 * We can safely release our hold on ipf_nat.
5167 		 */
5168 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5169 
5170 		switch (itp->igi_type)
5171 		{
5172 		case IPFGENITER_HOSTMAP :
5173 			if (hm != NULL) {
5174 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5175 				fr_hostmapdel(&hm);
5176 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5177 			}
5178 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5179 			if (error != 0) {
5180 				error = EFAULT;
5181 			} else {
5182 				dst += sizeof(*nexthm);
5183 				hm = nexthm;
5184 				nexthm = nexthm->hm_next;
5185 			}
5186 			break;
5187 		case IPFGENITER_IPNAT :
5188 			if (ipn != NULL) {
5189 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5190 				fr_ipnatderef(&ipn, ifs);
5191 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5192 			}
5193 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5194 			if (error != 0) {
5195 				error = EFAULT;
5196 			} else {
5197 				dst += sizeof(*nextipnat);
5198 				ipn = nextipnat;
5199 				nextipnat = nextipnat->in_next;
5200 			}
5201 			break;
5202 		case IPFGENITER_NAT :
5203 			if (nat != NULL) {
5204 				fr_natderef(&nat, ifs);
5205 			}
5206 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5207 			if (error != 0) {
5208 				error = EFAULT;
5209 			} else {
5210 				dst += sizeof(*nextnat);
5211 				nat = nextnat;
5212 				nextnat = nextnat->nat_next;
5213 			}
5214 			break;
5215 		default :
5216 			break;
5217 		}
5218 
5219 		if ((count == 1) || (error != 0))
5220 			break;
5221 
5222 		READ_ENTER(&ifs->ifs_ipf_nat);
5223 	}
5224 
5225 	return error;
5226 }
5227 
5228 
5229 /* ------------------------------------------------------------------------ */
5230 /* Function:    nat_iterator                                                */
5231 /* Returns:     int - 0 == ok, else error                                   */
5232 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5233 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5234 /*                                                                          */
5235 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5236 /* generic structure to iterate through a list.  There are three different  */
5237 /* linked lists of NAT related information to go through: NAT rules, active */
5238 /* NAT mappings and the NAT fragment cache.                                 */
5239 /* ------------------------------------------------------------------------ */
5240 static int nat_iterator(token, itp, ifs)
5241 ipftoken_t *token;
5242 ipfgeniter_t *itp;
5243 ipf_stack_t *ifs;
5244 {
5245 	int error;
5246 
5247 	if (itp->igi_data == NULL)
5248 		return EFAULT;
5249 
5250 	token->ipt_subtype = itp->igi_type;
5251 
5252 	switch (itp->igi_type)
5253 	{
5254 	case IPFGENITER_HOSTMAP :
5255 	case IPFGENITER_IPNAT :
5256 	case IPFGENITER_NAT :
5257 		error = nat_getnext(token, itp, ifs);
5258 		break;
5259 	case IPFGENITER_NATFRAG :
5260 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5261 				    &ifs->ifs_ipfr_nattail,
5262 				    &ifs->ifs_ipf_natfrag, ifs);
5263 		break;
5264 	default :
5265 		error = EINVAL;
5266 		break;
5267 	}
5268 
5269 	return error;
5270 }
5271 
5272 
5273 /* -------------------------------------------------------------------- */
5274 /* Function:	nat_earlydrop						*/
5275 /* Returns:	number of dropped/removed entries from the queue	*/
5276 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5277 /*		maxidle - entry must be idle this long to be dropped	*/
5278 /*		ifs - ipf stack instance				*/
5279 /*									*/
5280 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5281 /* form specified timeout queue, based on how long they've sat idle,	*/
5282 /* without waiting for it to happen on its own.				*/
5283 /* -------------------------------------------------------------------- */
5284 static int nat_earlydrop(ifq, maxidle, ifs)
5285 ipftq_t *ifq;
5286 int maxidle;
5287 ipf_stack_t *ifs;
5288 {
5289 	ipftqent_t *tqe, *tqn;
5290 	nat_t *nat;
5291 	unsigned int dropped;
5292 	int droptick;
5293 
5294 	if (ifq == NULL)
5295 		return (0);
5296 
5297 	dropped = 0;
5298 
5299 	/*
5300 	 * Determine the tick representing the idle time we're interested
5301 	 * in.  If an entry exists in the queue, and it was touched before
5302 	 * that tick, then it's been idle longer than maxidle ... remove it.
5303 	 */
5304 	droptick = ifs->ifs_fr_ticks - maxidle;
5305 	tqn = ifq->ifq_head;
5306 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5307 		tqn = tqe->tqe_next;
5308 		nat = tqe->tqe_parent;
5309 		nat_delete(nat, ISL_EXPIRE, ifs);
5310 		dropped++;
5311 	}
5312 	return (dropped);
5313 }
5314 
5315 
5316 /* --------------------------------------------------------------------- */
5317 /* Function:	nat_flushclosing					 */
5318 /* Returns:	int - number of NAT entries deleted			 */
5319 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5320 /*		ifs - ipf stack instance				 */
5321 /*									 */
5322 /* Remove nat table entries for TCP connections which are in the process */
5323 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5324 /* --------------------------------------------------------------------- */
5325 static int nat_flushclosing(stateval, ifs)
5326 int stateval;
5327 ipf_stack_t *ifs;
5328 {
5329 	ipftq_t *ifq, *ifqn;
5330 	ipftqent_t *tqe, *tqn;
5331 	nat_t *nat;
5332 	int dropped;
5333 
5334 	dropped = 0;
5335 
5336 	/*
5337 	 * Start by deleting any entries in specific timeout queues.
5338 	 */
5339 	ifqn = &ifs->ifs_nat_tqb[stateval];
5340 	while ((ifq = ifqn) != NULL) {
5341 		ifqn = ifq->ifq_next;
5342 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5343 	}
5344 
5345 	/*
5346 	 * Next, look through user defined queues for closing entries.
5347 	 */
5348 	ifqn = ifs->ifs_nat_utqe;
5349 	while ((ifq = ifqn) != NULL) {
5350 		ifqn = ifq->ifq_next;
5351 		tqn = ifq->ifq_head;
5352 		while ((tqe = tqn) != NULL) {
5353 			tqn = tqe->tqe_next;
5354 			nat = tqe->tqe_parent;
5355 			if (nat->nat_p != IPPROTO_TCP)
5356 				continue;
5357 			if ((nat->nat_tcpstate[0] >= stateval) &&
5358 			    (nat->nat_tcpstate[1] >= stateval)) {
5359 				nat_delete(nat, NL_EXPIRE, ifs);
5360 				dropped++;
5361 			}
5362 		}
5363 	}
5364 	return (dropped);
5365 }
5366 
5367 
5368 /* --------------------------------------------------------------------- */
5369 /* Function:	nat_extraflush						 */
5370 /* Returns:	int - number of NAT entries deleted			 */
5371 /* Parameters:	which(I) - how to flush the active NAT table		 */
5372 /*		ifs - ipf stack instance				 */
5373 /* Write Locks:	ipf_nat							 */
5374 /*									 */
5375 /* Flush nat tables.  Three actions currently defined:			 */
5376 /*									 */
5377 /* which == 0 :	Flush all nat table entries.				 */
5378 /*									 */
5379 /* which == 1 :	Flush entries with TCP connections which have started	 */
5380 /*		to close on both ends.					 */
5381 /*									 */
5382 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5383 /*		does not take us below specified threshold in the table, */
5384 /*		we want to flush entries with TCP connections which have */
5385 /*		been idle for a long time.  Start with connections idle	 */
5386 /*		over 12 hours,  and then work backwards in half hour	 */
5387 /*		increments to at most 30 minutes idle, and finally work	 */
5388 /*		back in 30 second increments to at most 30 seconds.	 */
5389 /* --------------------------------------------------------------------- */
5390 static int nat_extraflush(which, ifs)
5391 int which;
5392 ipf_stack_t *ifs;
5393 {
5394 	ipftq_t *ifq, *ifqn;
5395 	nat_t *nat, **natp;
5396 	int idletime, removed, idle_idx;
5397 	SPL_INT(s);
5398 
5399 	removed = 0;
5400 
5401 	SPL_NET(s);
5402 	switch (which)
5403 	{
5404 	case 0:
5405 		natp = &ifs->ifs_nat_instances;
5406 		while ((nat = *natp) != NULL) {
5407 			natp = &nat->nat_next;
5408 			nat_delete(nat, ISL_FLUSH, ifs);
5409 			removed++;
5410 		}
5411 		break;
5412 
5413 	case 1:
5414 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5415 		break;
5416 
5417 	case 2:
5418 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5419 
5420 		/*
5421 		 * Be sure we haven't done this in the last 10 seconds.
5422 		 */
5423 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5424 		    IPF_TTLVAL(10))
5425 			break;
5426 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5427 
5428 		/*
5429 		 * Determine initial threshold for minimum idle time based on
5430 		 * how long ipfilter has been running.  Ipfilter needs to have
5431 		 * been up as long as the smallest interval to continue on.
5432 		 *
5433 		 * Minimum idle times stored in idletime_tab and indexed by
5434 		 * idle_idx.  Start at upper end of array and work backwards.
5435 		 *
5436 		 * Once the index is found, set the initial idle time to the
5437 		 * first interval before the current ipfilter run time.
5438 		 */
5439 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5440 			break;  /* switch */
5441 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5442 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5443 			idletime = idletime_tab[idle_idx];
5444 		} else {
5445 			while ((idle_idx > 0) &&
5446 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5447 				idle_idx--;
5448 			idletime = (ifs->ifs_fr_ticks /
5449 				    idletime_tab[idle_idx]) *
5450 				    idletime_tab[idle_idx];
5451 		}
5452 
5453 		while ((idle_idx >= 0) &&
5454 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5455 			/*
5456 			 * Start with appropriate timeout queue.
5457 			 */
5458 			removed += nat_earlydrop(
5459 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5460 					idletime, ifs);
5461 
5462 			/*
5463 			 * Make sure we haven't already deleted enough
5464 			 * entries before checking the user defined queues.
5465 			 */
5466 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5467 			    ifs->ifs_nat_flush_lvl_lo)
5468 				break;
5469 
5470 			/*
5471 			 * Next, look through the user defined queues.
5472 			 */
5473 			ifqn = ifs->ifs_nat_utqe;
5474 			while ((ifq = ifqn) != NULL) {
5475 				ifqn = ifq->ifq_next;
5476 				removed += nat_earlydrop(ifq, idletime, ifs);
5477 			}
5478 
5479 			/*
5480 			 * Adjust the granularity of idle time.
5481 			 *
5482 			 * If we reach an interval boundary, we need to
5483 			 * either adjust the idle time accordingly or exit
5484 			 * the loop altogether (if this is very last check).
5485 			 */
5486 			idletime -= idletime_tab[idle_idx];
5487 			if (idletime < idletime_tab[idle_idx]) {
5488 				if (idle_idx != 0) {
5489 					idletime = idletime_tab[idle_idx] -
5490 					    idletime_tab[idle_idx - 1];
5491 					idle_idx--;
5492 				} else {
5493 					break;  /* while */
5494 				}
5495 			}
5496 		}
5497 		break;
5498 	default:
5499 		break;
5500 	}
5501 
5502 	SPL_X(s);
5503 	return (removed);
5504 }
5505