xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 17977493)
1 /*
2  * Copyright (C) 1995-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	void	nat_delrdr __P((struct ipnat *));
149 static	void	nat_delnat __P((struct ipnat *));
150 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
151 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
153 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
154 static	int	nat_match __P((fr_info_t *, ipnat_t *));
155 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
156 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
158 				    struct in_addr, struct in_addr, u_32_t,
159 				    ipf_stack_t *));
160 static	INLINE	int nat_icmpquerytype4 __P((int));
161 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
162 				    ipf_stack_t *));
163 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
164 				    ipf_stack_t *));
165 static	INLINE	int nat_icmperrortype4 __P((int));
166 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
167 				      tcphdr_t *, nat_t **, int));
168 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
169 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
170 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
171 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
172 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
173 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
174 static	int	nat_extraflush __P((int, ipf_stack_t *));
175 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
176 static	int	nat_flushclosing __P((int, ipf_stack_t *));
177 
178 
179 /*
180  * Below we declare a list of constants used only in the nat_extraflush()
181  * routine.  We are placing it here, instead of in nat_extraflush() itself,
182  * because we want to make it visible to tools such as mdb, nm etc., so the
183  * values can easily be altered during debugging.
184  */
185 static	const int	idletime_tab[] = {
186 	IPF_TTLVAL(30),		/* 30 seconds */
187 	IPF_TTLVAL(1800),	/* 30 minutes */
188 	IPF_TTLVAL(43200),	/* 12 hours */
189 	IPF_TTLVAL(345600),	/* 4 days */
190 };
191 
192 #define NAT_HAS_L4_CHANGED(n)	\
193 	(((n)->nat_flags & (IPN_TCPUDPICMP | IPN_ICMPQUERY)) && \
194 	(n)->nat_inport != (n)->nat_outport)
195 
196 
197 /* ------------------------------------------------------------------------ */
198 /* Function:    fr_natinit                                                  */
199 /* Returns:     int - 0 == success, -1 == failure                           */
200 /* Parameters:  Nil                                                         */
201 /*                                                                          */
202 /* Initialise all of the NAT locks, tables and other structures.            */
203 /* ------------------------------------------------------------------------ */
204 int fr_natinit(ifs)
205 ipf_stack_t *ifs;
206 {
207 	int i;
208 
209 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
210 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
211 	if (ifs->ifs_nat_table[0] != NULL)
212 		bzero((char *)ifs->ifs_nat_table[0],
213 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
214 	else
215 		return -1;
216 
217 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
218 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
219 	if (ifs->ifs_nat_table[1] != NULL)
220 		bzero((char *)ifs->ifs_nat_table[1],
221 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
222 	else
223 		return -2;
224 
225 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
226 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
227 	if (ifs->ifs_nat_rules != NULL)
228 		bzero((char *)ifs->ifs_nat_rules,
229 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
230 	else
231 		return -3;
232 
233 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
234 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
235 	if (ifs->ifs_rdr_rules != NULL)
236 		bzero((char *)ifs->ifs_rdr_rules,
237 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
238 	else
239 		return -4;
240 
241 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
242 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
243 	if (ifs->ifs_maptable != NULL)
244 		bzero((char *)ifs->ifs_maptable,
245 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
246 	else
247 		return -5;
248 
249 	ifs->ifs_ipf_hm_maplist = NULL;
250 
251 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
252 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
253 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
254 		return -1;
255 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
256 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
257 
258 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
259 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
260 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
261 		return -1;
262 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
263 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
264 
265 	if (ifs->ifs_fr_nat_maxbucket == 0) {
266 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
267 			ifs->ifs_fr_nat_maxbucket++;
268 		ifs->ifs_fr_nat_maxbucket *= 2;
269 	}
270 
271 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
272 	/*
273 	 * Increase this because we may have "keep state" following this too
274 	 * and packet storms can occur if this is removed too quickly.
275 	 */
276 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
277 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
278 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
279 	ifs->ifs_nat_udptq.ifq_ref = 1;
280 	ifs->ifs_nat_udptq.ifq_head = NULL;
281 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
282 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
283 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
284 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
285 	ifs->ifs_nat_icmptq.ifq_ref = 1;
286 	ifs->ifs_nat_icmptq.ifq_head = NULL;
287 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
288 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
289 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
290 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
291 	ifs->ifs_nat_iptq.ifq_ref = 1;
292 	ifs->ifs_nat_iptq.ifq_head = NULL;
293 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
294 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
295 	ifs->ifs_nat_iptq.ifq_next = NULL;
296 
297 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
298 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
299 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
300 #ifdef LARGE_NAT
301 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
302 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
303 #endif
304 	}
305 
306 	/*
307 	 * Increase this because we may have "keep state" following
308 	 * this too and packet storms can occur if this is removed
309 	 * too quickly.
310 	 */
311 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
312 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
313 
314 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
315 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
316 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
317 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
318 
319 	ifs->ifs_fr_nat_init = 1;
320 
321 	return 0;
322 }
323 
324 
325 /* ------------------------------------------------------------------------ */
326 /* Function:    nat_addrdr                                                  */
327 /* Returns:     Nil                                                         */
328 /* Parameters:  n(I) - pointer to NAT rule to add                           */
329 /*                                                                          */
330 /* Adds a redirect rule to the hash table of redirect rules and the list of */
331 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
332 /* use by redirect rules.                                                   */
333 /* ------------------------------------------------------------------------ */
334 static void nat_addrdr(n, ifs)
335 ipnat_t *n;
336 ipf_stack_t *ifs;
337 {
338 	ipnat_t **np;
339 	u_32_t j;
340 	u_int hv;
341 	int k;
342 
343 	k = count4bits(n->in_outmsk);
344 	if ((k >= 0) && (k != 32))
345 		ifs->ifs_rdr_masks |= 1 << k;
346 	j = (n->in_outip & n->in_outmsk);
347 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
348 	np = ifs->ifs_rdr_rules + hv;
349 	while (*np != NULL)
350 		np = &(*np)->in_rnext;
351 	n->in_rnext = NULL;
352 	n->in_prnext = np;
353 	n->in_hv = hv;
354 	*np = n;
355 }
356 
357 
358 /* ------------------------------------------------------------------------ */
359 /* Function:    nat_addnat                                                  */
360 /* Returns:     Nil                                                         */
361 /* Parameters:  n(I) - pointer to NAT rule to add                           */
362 /*                                                                          */
363 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
364 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
365 /* redirect rules.                                                          */
366 /* ------------------------------------------------------------------------ */
367 static void nat_addnat(n, ifs)
368 ipnat_t *n;
369 ipf_stack_t *ifs;
370 {
371 	ipnat_t **np;
372 	u_32_t j;
373 	u_int hv;
374 	int k;
375 
376 	k = count4bits(n->in_inmsk);
377 	if ((k >= 0) && (k != 32))
378 		ifs->ifs_nat_masks |= 1 << k;
379 	j = (n->in_inip & n->in_inmsk);
380 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
381 	np = ifs->ifs_nat_rules + hv;
382 	while (*np != NULL)
383 		np = &(*np)->in_mnext;
384 	n->in_mnext = NULL;
385 	n->in_pmnext = np;
386 	n->in_hv = hv;
387 	*np = n;
388 }
389 
390 
391 /* ------------------------------------------------------------------------ */
392 /* Function:    nat_delrdr                                                  */
393 /* Returns:     Nil                                                         */
394 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
395 /*                                                                          */
396 /* Removes a redirect rule from the hash table of redirect rules.           */
397 /* ------------------------------------------------------------------------ */
398 static void nat_delrdr(n)
399 ipnat_t *n;
400 {
401 	if (n->in_rnext)
402 		n->in_rnext->in_prnext = n->in_prnext;
403 	*n->in_prnext = n->in_rnext;
404 }
405 
406 
407 /* ------------------------------------------------------------------------ */
408 /* Function:    nat_delnat                                                  */
409 /* Returns:     Nil                                                         */
410 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
411 /*                                                                          */
412 /* Removes a NAT map rule from the hash table of NAT map rules.             */
413 /* ------------------------------------------------------------------------ */
414 static void nat_delnat(n)
415 ipnat_t *n;
416 {
417 	if (n->in_mnext != NULL)
418 		n->in_mnext->in_pmnext = n->in_pmnext;
419 	*n->in_pmnext = n->in_mnext;
420 }
421 
422 
423 /* ------------------------------------------------------------------------ */
424 /* Function:    nat_hostmap                                                 */
425 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
426 /*                                else a pointer to the hostmapping to use  */
427 /* Parameters:  np(I)   - pointer to NAT rule                               */
428 /*              real(I) - real IP address                                   */
429 /*              map(I)  - mapped IP address                                 */
430 /*              port(I) - destination port number                           */
431 /* Write Locks: ipf_nat                                                     */
432 /*                                                                          */
433 /* Check if an ip address has already been allocated for a given mapping    */
434 /* that is not doing port based translation.  If is not yet allocated, then */
435 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
436 /* ------------------------------------------------------------------------ */
437 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
438 ipnat_t *np;
439 struct in_addr src;
440 struct in_addr dst;
441 struct in_addr map;
442 u_32_t port;
443 ipf_stack_t *ifs;
444 {
445 	hostmap_t *hm;
446 	u_int hv;
447 
448 	hv = (src.s_addr ^ dst.s_addr);
449 	hv += src.s_addr;
450 	hv += dst.s_addr;
451 	hv %= HOSTMAP_SIZE;
452 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
453 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
454 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
455 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
456 		    ((port == 0) || (port == hm->hm_port))) {
457 			hm->hm_ref++;
458 			return hm;
459 		}
460 
461 	if (np == NULL)
462 		return NULL;
463 
464 	KMALLOC(hm, hostmap_t *);
465 	if (hm) {
466 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
467 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
468 		if (ifs->ifs_ipf_hm_maplist != NULL)
469 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
470 		ifs->ifs_ipf_hm_maplist = hm;
471 
472 		hm->hm_next = ifs->ifs_maptable[hv];
473 		hm->hm_pnext = ifs->ifs_maptable + hv;
474 		if (ifs->ifs_maptable[hv] != NULL)
475 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
476 		ifs->ifs_maptable[hv] = hm;
477 		hm->hm_ipnat = np;
478 		hm->hm_srcip = src;
479 		hm->hm_dstip = dst;
480 		hm->hm_mapip = map;
481 		hm->hm_ref = 1;
482 		hm->hm_port = port;
483 	}
484 	return hm;
485 }
486 
487 
488 /* ------------------------------------------------------------------------ */
489 /* Function:    fr_hostmapdel                                              */
490 /* Returns:     Nil                                                         */
491 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
492 /* Write Locks: ipf_nat                                                     */
493 /*                                                                          */
494 /* Decrement the references to this hostmap structure by one.  If this      */
495 /* reaches zero then remove it and free it.                                 */
496 /* ------------------------------------------------------------------------ */
497 void fr_hostmapdel(hmp)
498 struct hostmap **hmp;
499 {
500 	struct hostmap *hm;
501 
502 	hm = *hmp;
503 	*hmp = NULL;
504 
505 	hm->hm_ref--;
506 	if (hm->hm_ref == 0) {
507 		if (hm->hm_next)
508 			hm->hm_next->hm_pnext = hm->hm_pnext;
509 		*hm->hm_pnext = hm->hm_next;
510 		if (hm->hm_hnext)
511 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
512 		*hm->hm_phnext = hm->hm_hnext;
513 		KFREE(hm);
514 	}
515 }
516 
517 
518 /* ------------------------------------------------------------------------ */
519 /* Function:    fix_outcksum                                                */
520 /* Returns:     Nil                                                         */
521 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
522 /*              n((I)  - amount to adjust checksum by                       */
523 /*                                                                          */
524 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
525 /* ------------------------------------------------------------------------ */
526 void fix_outcksum(sp, n)
527 u_short *sp;
528 u_32_t n;
529 {
530 	u_short sumshort;
531 	u_32_t sum1;
532 
533 	if (n == 0)
534 		return;
535 
536 	sum1 = (~ntohs(*sp)) & 0xffff;
537 	sum1 += (n);
538 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
539 	/* Again */
540 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
541 	sumshort = ~(u_short)sum1;
542 	*(sp) = htons(sumshort);
543 }
544 
545 
546 /* ------------------------------------------------------------------------ */
547 /* Function:    fix_incksum                                                 */
548 /* Returns:     Nil                                                         */
549 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
550 /*              n((I)  - amount to adjust checksum by                       */
551 /*                                                                          */
552 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
553 /* ------------------------------------------------------------------------ */
554 void fix_incksum(sp, n)
555 u_short *sp;
556 u_32_t n;
557 {
558 	u_short sumshort;
559 	u_32_t sum1;
560 
561 	if (n == 0)
562 		return;
563 
564 	sum1 = (~ntohs(*sp)) & 0xffff;
565 	sum1 += ~(n) & 0xffff;
566 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
567 	/* Again */
568 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
569 	sumshort = ~(u_short)sum1;
570 	*(sp) = htons(sumshort);
571 }
572 
573 
574 /* ------------------------------------------------------------------------ */
575 /* Function:    fix_datacksum                                               */
576 /* Returns:     Nil                                                         */
577 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
578 /*              n((I)  - amount to adjust checksum by                       */
579 /*                                                                          */
580 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
581 /* data section of an IP packet.                                            */
582 /*                                                                          */
583 /* The only situation in which you need to do this is when NAT'ing an       */
584 /* ICMP error message. Such a message, contains in its body the IP header   */
585 /* of the original IP packet, that causes the error.                        */
586 /*                                                                          */
587 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
588 /* kernel the data section of the ICMP error is just data, and no special   */
589 /* processing like hardware cksum or ntohs processing have been done by the */
590 /* kernel on the data section.                                              */
591 /* ------------------------------------------------------------------------ */
592 void fix_datacksum(sp, n)
593 u_short *sp;
594 u_32_t n;
595 {
596 	u_short sumshort;
597 	u_32_t sum1;
598 
599 	if (n == 0)
600 		return;
601 
602 	sum1 = (~ntohs(*sp)) & 0xffff;
603 	sum1 += (n);
604 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
605 	/* Again */
606 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
607 	sumshort = ~(u_short)sum1;
608 	*(sp) = htons(sumshort);
609 }
610 
611 
612 /* ------------------------------------------------------------------------ */
613 /* Function:    fr_nat_ioctl                                                */
614 /* Returns:     int - 0 == success, != 0 == failure                         */
615 /* Parameters:  data(I) - pointer to ioctl data                             */
616 /*              cmd(I)  - ioctl command integer                             */
617 /*              mode(I) - file mode bits used with open                     */
618 /*                                                                          */
619 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
620 /* ------------------------------------------------------------------------ */
621 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
622 ioctlcmd_t cmd;
623 caddr_t data;
624 int mode, uid;
625 void *ctx;
626 ipf_stack_t *ifs;
627 {
628 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
629 	int error = 0, ret, arg, getlock;
630 	ipnat_t natd;
631 
632 #if (BSD >= 199306) && defined(_KERNEL)
633 	if ((securelevel >= 2) && (mode & FWRITE))
634 		return EPERM;
635 #endif
636 
637 #if defined(__osf__) && defined(_KERNEL)
638 	getlock = 0;
639 #else
640 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
641 #endif
642 
643 	nat = NULL;     /* XXX gcc -Wuninitialized */
644 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
645 		KMALLOC(nt, ipnat_t *);
646 	} else {
647 		nt = NULL;
648 	}
649 
650 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
651 		if (mode & NAT_SYSSPACE) {
652 			bcopy(data, (char *)&natd, sizeof(natd));
653 			error = 0;
654 		} else {
655 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
656 		}
657 
658 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
659 		BCOPYIN(data, &arg, sizeof(arg));
660 	}
661 
662 	if (error != 0)
663 		goto done;
664 
665 	/*
666 	 * For add/delete, look to see if the NAT entry is already present
667 	 */
668 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
669 		nat = &natd;
670 		if (nat->in_v == 0)	/* For backward compat. */
671 			nat->in_v = 4;
672 		nat->in_flags &= IPN_USERFLAGS;
673 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
674 			if ((nat->in_flags & IPN_SPLIT) == 0)
675 				nat->in_inip &= nat->in_inmsk;
676 			if ((nat->in_flags & IPN_IPRANGE) == 0)
677 				nat->in_outip &= nat->in_outmsk;
678 		}
679 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
680 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
681 		     np = &n->in_next)
682 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
683 			    IPN_CMPSIZ) == 0) {
684 				if (nat->in_redir == NAT_REDIRECT &&
685 				    nat->in_pnext != n->in_pnext)
686 					continue;
687 				break;
688 			}
689 	}
690 
691 	switch (cmd)
692 	{
693 	case SIOCGENITER :
694 	    {
695 		ipfgeniter_t iter;
696 		ipftoken_t *token;
697 
698 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
699 		if (error != 0)
700 			break;
701 
702 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
703 		if (token != NULL)
704 			error  = nat_iterator(token, &iter, ifs);
705 		else
706 			error = ESRCH;
707 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
708 		break;
709 	    }
710 #ifdef  IPFILTER_LOG
711 	case SIOCIPFFB :
712 	{
713 		int tmp;
714 
715 		if (!(mode & FWRITE))
716 			error = EPERM;
717 		else {
718 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
719 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
720 		}
721 		break;
722 	}
723 	case SIOCSETLG :
724 		if (!(mode & FWRITE))
725 			error = EPERM;
726 		else {
727 			BCOPYIN((char *)data,
728 				       (char *)&ifs->ifs_nat_logging,
729 				sizeof(ifs->ifs_nat_logging));
730 		}
731 		break;
732 	case SIOCGETLG :
733 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
734 			sizeof(ifs->ifs_nat_logging));
735 		break;
736 	case FIONREAD :
737 		arg = ifs->ifs_iplused[IPL_LOGNAT];
738 		BCOPYOUT(&arg, data, sizeof(arg));
739 		break;
740 #endif
741 	case SIOCADNAT :
742 		if (!(mode & FWRITE)) {
743 			error = EPERM;
744 		} else if (n != NULL) {
745 			error = EEXIST;
746 		} else if (nt == NULL) {
747 			error = ENOMEM;
748 		}
749 		if (error != 0) {
750 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
751 			break;
752 		}
753 		bcopy((char *)nat, (char *)nt, sizeof(*n));
754 		error = nat_siocaddnat(nt, np, getlock, ifs);
755 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
756 		if (error == 0)
757 			nt = NULL;
758 		break;
759 	case SIOCRMNAT :
760 		if (!(mode & FWRITE)) {
761 			error = EPERM;
762 			n = NULL;
763 		} else if (n == NULL) {
764 			error = ESRCH;
765 		}
766 
767 		if (error != 0) {
768 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
769 			break;
770 		}
771 		nat_siocdelnat(n, np, getlock, ifs);
772 
773 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
774 		n = NULL;
775 		break;
776 	case SIOCGNATS :
777 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
778 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
779 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
780 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
781 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
782 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
783 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
784 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
785 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
786 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
787 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
788 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
789 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
790 		break;
791 	case SIOCGNATL :
792 	    {
793 		natlookup_t nl;
794 
795 		if (getlock) {
796 			READ_ENTER(&ifs->ifs_ipf_nat);
797 		}
798 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
799 		if (error == 0) {
800 			if (nat_lookupredir(&nl, ifs) != NULL) {
801 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
802 			} else {
803 				error = ESRCH;
804 			}
805 		}
806 		if (getlock) {
807 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
808 		}
809 		break;
810 	    }
811 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
812 		if (!(mode & FWRITE)) {
813 			error = EPERM;
814 			break;
815 		}
816 		if (getlock) {
817 			WRITE_ENTER(&ifs->ifs_ipf_nat);
818 		}
819 		error = 0;
820 		if (arg == 0)
821 			ret = nat_flushtable(ifs);
822 		else if (arg == 1)
823 			ret = nat_clearlist(ifs);
824 		else if (arg >= 2 && arg <= 4)
825 			ret = nat_extraflush(arg - 2, ifs);
826 		else
827 			error = EINVAL;
828 		if (getlock) {
829 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
830 		}
831 		if (error == 0) {
832 			BCOPYOUT(&ret, data, sizeof(ret));
833 		}
834 		break;
835 	case SIOCPROXY :
836 		error = appr_ioctl(data, cmd, mode, ifs);
837 		break;
838 	case SIOCSTLCK :
839 		if (!(mode & FWRITE)) {
840 			error = EPERM;
841 		} else {
842 			fr_lock(data, &ifs->ifs_fr_nat_lock);
843 		}
844 		break;
845 	case SIOCSTPUT :
846 		if ((mode & FWRITE) != 0) {
847 			error = fr_natputent(data, getlock, ifs);
848 		} else {
849 			error = EACCES;
850 		}
851 		break;
852 	case SIOCSTGSZ :
853 		if (ifs->ifs_fr_nat_lock) {
854 			if (getlock) {
855 				READ_ENTER(&ifs->ifs_ipf_nat);
856 			}
857 			error = fr_natgetsz(data, ifs);
858 			if (getlock) {
859 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
860 			}
861 		} else
862 			error = EACCES;
863 		break;
864 	case SIOCSTGET :
865 		if (ifs->ifs_fr_nat_lock) {
866 			if (getlock) {
867 				READ_ENTER(&ifs->ifs_ipf_nat);
868 			}
869 			error = fr_natgetent(data, ifs);
870 			if (getlock) {
871 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
872 			}
873 		} else
874 			error = EACCES;
875 		break;
876 	case SIOCIPFDELTOK :
877 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
878 		error = ipf_deltoken(arg, uid, ctx, ifs);
879 		break;
880 	default :
881 		error = EINVAL;
882 		break;
883 	}
884 done:
885 	if (nt)
886 		KFREE(nt);
887 	return error;
888 }
889 
890 
891 /* ------------------------------------------------------------------------ */
892 /* Function:    nat_siocaddnat                                              */
893 /* Returns:     int - 0 == success, != 0 == failure                         */
894 /* Parameters:  n(I)       - pointer to new NAT rule                        */
895 /*              np(I)      - pointer to where to insert new NAT rule        */
896 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
897 /* Mutex Locks: ipf_natio                                                   */
898 /*                                                                          */
899 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
900 /* from information passed to the kernel, then add it  to the appropriate   */
901 /* NAT rule table(s).                                                       */
902 /* ------------------------------------------------------------------------ */
903 static int nat_siocaddnat(n, np, getlock, ifs)
904 ipnat_t *n, **np;
905 int getlock;
906 ipf_stack_t *ifs;
907 {
908 	int error = 0, i, j;
909 
910 	if (nat_resolverule(n, ifs) != 0)
911 		return ENOENT;
912 
913 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
914 		return EINVAL;
915 
916 	n->in_use = 0;
917 	if (n->in_redir & NAT_MAPBLK)
918 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
919 	else if (n->in_flags & IPN_AUTOPORTMAP)
920 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
921 	else if (n->in_flags & IPN_IPRANGE)
922 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
923 	else if (n->in_flags & IPN_SPLIT)
924 		n->in_space = 2;
925 	else if (n->in_outmsk != 0)
926 		n->in_space = ~ntohl(n->in_outmsk);
927 	else
928 		n->in_space = 1;
929 
930 	/*
931 	 * Calculate the number of valid IP addresses in the output
932 	 * mapping range.  In all cases, the range is inclusive of
933 	 * the start and ending IP addresses.
934 	 * If to a CIDR address, lose 2: broadcast + network address
935 	 *                               (so subtract 1)
936 	 * If to a range, add one.
937 	 * If to a single IP address, set to 1.
938 	 */
939 	if (n->in_space) {
940 		if ((n->in_flags & IPN_IPRANGE) != 0)
941 			n->in_space += 1;
942 		else
943 			n->in_space -= 1;
944 	} else
945 		n->in_space = 1;
946 
947 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
948 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
949 		n->in_nip = ntohl(n->in_outip) + 1;
950 	else if ((n->in_flags & IPN_SPLIT) &&
951 		 (n->in_redir & NAT_REDIRECT))
952 		n->in_nip = ntohl(n->in_inip);
953 	else
954 		n->in_nip = ntohl(n->in_outip);
955 	if (n->in_redir & NAT_MAP) {
956 		n->in_pnext = ntohs(n->in_pmin);
957 		/*
958 		 * Multiply by the number of ports made available.
959 		 */
960 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
961 			n->in_space *= (ntohs(n->in_pmax) -
962 					ntohs(n->in_pmin) + 1);
963 			/*
964 			 * Because two different sources can map to
965 			 * different destinations but use the same
966 			 * local IP#/port #.
967 			 * If the result is smaller than in_space, then
968 			 * we may have wrapped around 32bits.
969 			 */
970 			i = n->in_inmsk;
971 			if ((i != 0) && (i != 0xffffffff)) {
972 				j = n->in_space * (~ntohl(i) + 1);
973 				if (j >= n->in_space)
974 					n->in_space = j;
975 				else
976 					n->in_space = 0xffffffff;
977 			}
978 		}
979 		/*
980 		 * If no protocol is specified, multiple by 256 to allow for
981 		 * at least one IP:IP mapping per protocol.
982 		 */
983 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
984 				j = n->in_space * 256;
985 				if (j >= n->in_space)
986 					n->in_space = j;
987 				else
988 					n->in_space = 0xffffffff;
989 		}
990 	}
991 
992 	/* Otherwise, these fields are preset */
993 
994 	if (getlock) {
995 		WRITE_ENTER(&ifs->ifs_ipf_nat);
996 	}
997 	n->in_next = NULL;
998 	*np = n;
999 
1000 	if (n->in_age[0] != 0)
1001 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1002 						  n->in_age[0], ifs);
1003 
1004 	if (n->in_age[1] != 0)
1005 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1006 						  n->in_age[1], ifs);
1007 
1008 	if (n->in_redir & NAT_REDIRECT) {
1009 		n->in_flags &= ~IPN_NOTDST;
1010 		nat_addrdr(n, ifs);
1011 	}
1012 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1013 		n->in_flags &= ~IPN_NOTSRC;
1014 		nat_addnat(n, ifs);
1015 	}
1016 	n = NULL;
1017 	ifs->ifs_nat_stats.ns_rules++;
1018 	if (getlock) {
1019 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1020 	}
1021 
1022 	return error;
1023 }
1024 
1025 
1026 /* ------------------------------------------------------------------------ */
1027 /* Function:    nat_resolvrule                                              */
1028 /* Returns:     int - 0 == success, -1 == failure                           */
1029 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1030 /*                                                                          */
1031 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1032 /* any specified interfaces and proxy labels, and determines whether or not */
1033 /* all proxy labels are correctly specified.				    */
1034 /*									    */
1035 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1036 /* ------------------------------------------------------------------------ */
1037 static int nat_resolverule(n, ifs)
1038 ipnat_t *n;
1039 ipf_stack_t *ifs;
1040 {
1041 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1042 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1043 
1044 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1045 	if (n->in_ifnames[1][0] == '\0') {
1046 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1047 		n->in_ifps[1] = n->in_ifps[0];
1048 	} else {
1049 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1050 	}
1051 
1052 	if (n->in_plabel[0] != '\0') {
1053 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1054 		if (n->in_apr == NULL)
1055 			return -1;
1056 	}
1057 	return 0;
1058 }
1059 
1060 
1061 /* ------------------------------------------------------------------------ */
1062 /* Function:    nat_siocdelnat                                              */
1063 /* Returns:     int - 0 == success, != 0 == failure                         */
1064 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1065 /*              np(I)      - pointer to where to insert new NAT rule        */
1066 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1067 /* Mutex Locks: ipf_natio                                                   */
1068 /*                                                                          */
1069 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1070 /* from information passed to the kernel, then add it  to the appropriate   */
1071 /* NAT rule table(s).                                                       */
1072 /* ------------------------------------------------------------------------ */
1073 static void nat_siocdelnat(n, np, getlock, ifs)
1074 ipnat_t *n, **np;
1075 int getlock;
1076 ipf_stack_t *ifs;
1077 {
1078 	if (getlock) {
1079 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1080 	}
1081 	if (n->in_redir & NAT_REDIRECT)
1082 		nat_delrdr(n);
1083 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1084 		nat_delnat(n);
1085 	if (ifs->ifs_nat_list == NULL) {
1086 		ifs->ifs_nat_masks = 0;
1087 		ifs->ifs_rdr_masks = 0;
1088 	}
1089 
1090 	if (n->in_tqehead[0] != NULL) {
1091 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1092 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1093 		}
1094 	}
1095 
1096 	if (n->in_tqehead[1] != NULL) {
1097 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1098 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1099 		}
1100 	}
1101 
1102 	*np = n->in_next;
1103 
1104 	if (n->in_use == 0) {
1105 		if (n->in_apr)
1106 			appr_free(n->in_apr);
1107 		KFREE(n);
1108 		ifs->ifs_nat_stats.ns_rules--;
1109 	} else {
1110 		n->in_flags |= IPN_DELETE;
1111 		n->in_next = NULL;
1112 	}
1113 	if (getlock) {
1114 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1115 	}
1116 }
1117 
1118 
1119 /* ------------------------------------------------------------------------ */
1120 /* Function:    fr_natgetsz                                                 */
1121 /* Returns:     int - 0 == success, != 0 is the error value.                */
1122 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1123 /*                        get the size of.                                  */
1124 /*                                                                          */
1125 /* Handle SIOCSTGSZ.                                                        */
1126 /* Return the size of the nat list entry to be copied back to user space.   */
1127 /* The size of the entry is stored in the ng_sz field and the enture natget */
1128 /* structure is copied back to the user.                                    */
1129 /* ------------------------------------------------------------------------ */
1130 static int fr_natgetsz(data, ifs)
1131 caddr_t data;
1132 ipf_stack_t *ifs;
1133 {
1134 	ap_session_t *aps;
1135 	nat_t *nat, *n;
1136 	natget_t ng;
1137 
1138 	BCOPYIN(data, &ng, sizeof(ng));
1139 
1140 	nat = ng.ng_ptr;
1141 	if (!nat) {
1142 		nat = ifs->ifs_nat_instances;
1143 		ng.ng_sz = 0;
1144 		/*
1145 		 * Empty list so the size returned is 0.  Simple.
1146 		 */
1147 		if (nat == NULL) {
1148 			BCOPYOUT(&ng, data, sizeof(ng));
1149 			return 0;
1150 		}
1151 	} else {
1152 		/*
1153 		 * Make sure the pointer we're copying from exists in the
1154 		 * current list of entries.  Security precaution to prevent
1155 		 * copying of random kernel data.
1156 		 */
1157 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1158 			if (n == nat)
1159 				break;
1160 		if (!n)
1161 			return ESRCH;
1162 	}
1163 
1164 	/*
1165 	 * Incluse any space required for proxy data structures.
1166 	 */
1167 	ng.ng_sz = sizeof(nat_save_t);
1168 	aps = nat->nat_aps;
1169 	if (aps != NULL) {
1170 		ng.ng_sz += sizeof(ap_session_t) - 4;
1171 		if (aps->aps_data != 0)
1172 			ng.ng_sz += aps->aps_psiz;
1173 	}
1174 
1175 	BCOPYOUT(&ng, data, sizeof(ng));
1176 	return 0;
1177 }
1178 
1179 
1180 /* ------------------------------------------------------------------------ */
1181 /* Function:    fr_natgetent                                                */
1182 /* Returns:     int - 0 == success, != 0 is the error value.                */
1183 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1184 /*                        to NAT structure to copy out.                     */
1185 /*                                                                          */
1186 /* Handle SIOCSTGET.                                                        */
1187 /* Copies out NAT entry to user space.  Any additional data held for a      */
1188 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1189 /* ------------------------------------------------------------------------ */
1190 static int fr_natgetent(data, ifs)
1191 caddr_t data;
1192 ipf_stack_t *ifs;
1193 {
1194 	int error, outsize;
1195 	ap_session_t *aps;
1196 	nat_save_t *ipn, ipns;
1197 	nat_t *n, *nat;
1198 
1199 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1200 	if (error != 0)
1201 		return error;
1202 
1203 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1204 		return EINVAL;
1205 
1206 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1207 	if (ipn == NULL)
1208 		return ENOMEM;
1209 
1210 	ipn->ipn_dsize = ipns.ipn_dsize;
1211 	nat = ipns.ipn_next;
1212 	if (nat == NULL) {
1213 		nat = ifs->ifs_nat_instances;
1214 		if (nat == NULL) {
1215 			if (ifs->ifs_nat_instances == NULL)
1216 				error = ENOENT;
1217 			goto finished;
1218 		}
1219 	} else {
1220 		/*
1221 		 * Make sure the pointer we're copying from exists in the
1222 		 * current list of entries.  Security precaution to prevent
1223 		 * copying of random kernel data.
1224 		 */
1225 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1226 			if (n == nat)
1227 				break;
1228 		if (n == NULL) {
1229 			error = ESRCH;
1230 			goto finished;
1231 		}
1232 	}
1233 	ipn->ipn_next = nat->nat_next;
1234 
1235 	/*
1236 	 * Copy the NAT structure.
1237 	 */
1238 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1239 
1240 	/*
1241 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1242 	 */
1243 	if (nat->nat_ptr != NULL)
1244 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1245 		      sizeof(ipn->ipn_ipnat));
1246 
1247 	/*
1248 	 * If we also know the NAT entry has an associated filter rule,
1249 	 * save that too.
1250 	 */
1251 	if (nat->nat_fr != NULL)
1252 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1253 		      sizeof(ipn->ipn_fr));
1254 
1255 	/*
1256 	 * Last but not least, if there is an application proxy session set
1257 	 * up for this NAT entry, then copy that out too, including any
1258 	 * private data saved along side it by the proxy.
1259 	 */
1260 	aps = nat->nat_aps;
1261 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1262 	if (aps != NULL) {
1263 		char *s;
1264 
1265 		if (outsize < sizeof(*aps)) {
1266 			error = ENOBUFS;
1267 			goto finished;
1268 		}
1269 
1270 		s = ipn->ipn_data;
1271 		bcopy((char *)aps, s, sizeof(*aps));
1272 		s += sizeof(*aps);
1273 		outsize -= sizeof(*aps);
1274 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1275 			bcopy(aps->aps_data, s, aps->aps_psiz);
1276 		else
1277 			error = ENOBUFS;
1278 	}
1279 	if (error == 0) {
1280 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1281 	}
1282 
1283 finished:
1284 	if (ipn != NULL) {
1285 		KFREES(ipn, ipns.ipn_dsize);
1286 	}
1287 	return error;
1288 }
1289 
1290 /* ------------------------------------------------------------------------ */
1291 /* Function:    nat_calc_chksum_diffs					    */
1292 /* Returns:     void							    */
1293 /* Parameters:  nat	-	pointer to NAT table entry		    */
1294 /*                                                                          */
1295 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1296 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1297 /* we are dealing with partial chksum offload. For these cases we need to   */
1298 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1299 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1300 /* nat_sumd[0]. 							    */
1301 /*									    */
1302 /* The function accepts initialized NAT table entry and computes the deltas */
1303 /* from nat_inip/nat_outip members. The function is called right before	    */
1304 /* the new entry is inserted into the table.				    */
1305 /*									    */
1306 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1307 /* of delta between original and new IP addresses.			    */
1308 /*									    */
1309 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1310 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1311 /*									    */
1312 /* Some facts about chksum, we should remember:				    */
1313 /*	IP header chksum covers IP header only				    */
1314 /*									    */
1315 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1316 /*		SRC, DST IP address					    */
1317 /*		SRC, DST Port						    */
1318 /*		length of payload					    */
1319 /*									    */
1320 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1321 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1322 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1323 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1324 /* stored along with other IP packet data in dblk_t structure and used in   */
1325 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1326 /*									    */
1327 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1328 /* of delta between new and orig address. NOTE: the order of operands for   */
1329 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1330 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1331 /*									    */
1332 /* ------------------------------------------------------------------------ */
1333 static void nat_calc_chksum_diffs(nat)
1334 nat_t *nat;
1335 {
1336 	u_32_t	sum_orig = 0;
1337 	u_32_t	sum_changed = 0;
1338 	u_32_t	sumd;
1339 	u_32_t	ipsum_orig = 0;
1340 	u_32_t	ipsum_changed = 0;
1341 
1342 	/*
1343 	 * the switch calculates operands for CALC_SUMD(),
1344 	 * which will compute the partial chksum delta.
1345 	 */
1346 	switch (nat->nat_dir)
1347 	{
1348 	case NAT_INBOUND:
1349 		/*
1350 		 * we are dealing with RDR rule (DST address gets
1351 		 * modified on packet from client)
1352 		 */
1353 		sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1354 		sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1355 		break;
1356 	case NAT_OUTBOUND:
1357 		/*
1358 		 * we are dealing with MAP rule (SRC address gets
1359 		 * modified on packet from client)
1360 		 */
1361 		sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1362 		sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1363 		break;
1364 	default: ;
1365 		break;
1366 	}
1367 
1368 	/*
1369 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1370 	 * calculation, which happens at the end of function.
1371 	 */
1372 	ipsum_changed = sum_changed;
1373 	ipsum_orig = sum_orig;
1374 	/*
1375 	 * NOTE: the order of operands for partial chksum adjustment
1376 	 * computation has to be swapped!
1377 	 */
1378 	CALC_SUMD(sum_changed, sum_orig, sumd);
1379 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1380 
1381 	if (nat->nat_p == IPPROTO_TCP || nat->nat_p == IPPROTO_UDP) {
1382 
1383 		/*
1384 		 * switch calculates operands for CALC_SUMD(), which will
1385 		 * compute the full chksum delta.
1386 		 */
1387 		switch (nat->nat_dir)
1388 		{
1389 		case NAT_INBOUND:
1390 			sum_changed = LONG_SUM(
1391 					ntohl(nat->nat_inip.s_addr) +
1392 					ntohs(nat->nat_inport)
1393 				    );
1394 			sum_orig = LONG_SUM(
1395 					ntohl(nat->nat_outip.s_addr) +
1396 					ntohs(nat->nat_outport)
1397 				    );
1398 			break;
1399 		case NAT_OUTBOUND:
1400 			sum_changed = LONG_SUM(
1401 					ntohl(nat->nat_outip.s_addr) +
1402 					ntohs(nat->nat_outport)
1403 				);
1404 			sum_orig = LONG_SUM(
1405 					ntohl(nat->nat_inip.s_addr) +
1406 					ntohs(nat->nat_inport)
1407 				);
1408 			break;
1409 		default: ;
1410 			break;
1411 		}
1412 
1413 		CALC_SUMD(sum_orig, sum_changed, sumd);
1414 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1415 	}
1416 	else
1417 		nat->nat_sumd[0] = nat->nat_sumd[1];
1418 
1419 	/*
1420 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1421 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1422 	 */
1423 	if (NAT_HAS_L4_CHANGED(nat)) {
1424 		/*
1425 		 * bad luck, NAT changes also the L4 header, use IP addresses
1426 		 * to compute chksum adjustment for IP header.
1427 		 */
1428 		CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1429 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1430 	}
1431 	else {
1432 		/*
1433 		 * the NAT does not change L4 hdr -> reuse chksum adjustment
1434 		 * for IP hdr.
1435 		 */
1436 		nat->nat_ipsumd = nat->nat_sumd[0];
1437 	}
1438 
1439 	return;
1440 }
1441 
1442 /* ------------------------------------------------------------------------ */
1443 /* Function:    fr_natputent                                                */
1444 /* Returns:     int - 0 == success, != 0 is the error value.                */
1445 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1446 /*                            structure information to load into the kernel */
1447 /*              getlock(I) - flag indicating whether or not a write lock    */
1448 /*                           on ipf_nat is already held.                    */
1449 /*                                                                          */
1450 /* Handle SIOCSTPUT.                                                        */
1451 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1452 /* firewall rule data structures, if pointers to them indicate so.          */
1453 /* ------------------------------------------------------------------------ */
1454 static int fr_natputent(data, getlock, ifs)
1455 caddr_t data;
1456 int getlock;
1457 ipf_stack_t *ifs;
1458 {
1459 	nat_save_t ipn, *ipnn;
1460 	ap_session_t *aps;
1461 	nat_t *n, *nat;
1462 	frentry_t *fr;
1463 	fr_info_t fin;
1464 	ipnat_t *in;
1465 	int error;
1466 
1467 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1468 	if (error != 0)
1469 		return error;
1470 
1471 	/*
1472 	 * Trigger automatic call to nat_extraflush() if the
1473 	 * table has reached capcity specified by hi watermark.
1474 	 */
1475 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1476 		ifs->ifs_nat_doflush = 1;
1477 
1478 	/*
1479 	 * Initialise early because of code at junkput label.
1480 	 */
1481 	in = NULL;
1482 	aps = NULL;
1483 	nat = NULL;
1484 	ipnn = NULL;
1485 
1486 	/*
1487 	 * New entry, copy in the rest of the NAT entry if it's size is more
1488 	 * than just the nat_t structure.
1489 	 */
1490 	fr = NULL;
1491 	if (ipn.ipn_dsize > sizeof(ipn)) {
1492 		if (ipn.ipn_dsize > 81920) {
1493 			error = ENOMEM;
1494 			goto junkput;
1495 		}
1496 
1497 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1498 		if (ipnn == NULL)
1499 			return ENOMEM;
1500 
1501 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1502 		if (error != 0) {
1503 			error = EFAULT;
1504 			goto junkput;
1505 		}
1506 	} else
1507 		ipnn = &ipn;
1508 
1509 	KMALLOC(nat, nat_t *);
1510 	if (nat == NULL) {
1511 		error = ENOMEM;
1512 		goto junkput;
1513 	}
1514 
1515 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1516 	/*
1517 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1518 	 */
1519 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1520 	nat->nat_tqe.tqe_pnext = NULL;
1521 	nat->nat_tqe.tqe_next = NULL;
1522 	nat->nat_tqe.tqe_ifq = NULL;
1523 	nat->nat_tqe.tqe_parent = nat;
1524 
1525 	/*
1526 	 * Restore the rule associated with this nat session
1527 	 */
1528 	in = ipnn->ipn_nat.nat_ptr;
1529 	if (in != NULL) {
1530 		KMALLOC(in, ipnat_t *);
1531 		nat->nat_ptr = in;
1532 		if (in == NULL) {
1533 			error = ENOMEM;
1534 			goto junkput;
1535 		}
1536 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1537 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1538 		in->in_use = 1;
1539 		in->in_flags |= IPN_DELETE;
1540 
1541 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1542 
1543 		if (nat_resolverule(in, ifs) != 0) {
1544 			error = ESRCH;
1545 			goto junkput;
1546 		}
1547 	}
1548 
1549 	/*
1550 	 * Check that the NAT entry doesn't already exist in the kernel.
1551 	 */
1552 	bzero((char *)&fin, sizeof(fin));
1553 	fin.fin_p = nat->nat_p;
1554 	fin.fin_ifs = ifs;
1555 	if (nat->nat_dir == NAT_OUTBOUND) {
1556 		fin.fin_data[0] = ntohs(nat->nat_oport);
1557 		fin.fin_data[1] = ntohs(nat->nat_outport);
1558 		fin.fin_ifp = nat->nat_ifps[0];
1559 		if (getlock) {
1560 			READ_ENTER(&ifs->ifs_ipf_nat);
1561 		}
1562 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1563 			nat->nat_oip, nat->nat_outip);
1564 		if (getlock) {
1565 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1566 		}
1567 		if (n != NULL) {
1568 			error = EEXIST;
1569 			goto junkput;
1570 		}
1571 	} else if (nat->nat_dir == NAT_INBOUND) {
1572 		fin.fin_data[0] = ntohs(nat->nat_inport);
1573 		fin.fin_data[1] = ntohs(nat->nat_oport);
1574 		fin.fin_ifp = nat->nat_ifps[1];
1575 		if (getlock) {
1576 			READ_ENTER(&ifs->ifs_ipf_nat);
1577 		}
1578 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1579 			nat->nat_inip, nat->nat_oip);
1580 		if (getlock) {
1581 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1582 		}
1583 		if (n != NULL) {
1584 			error = EEXIST;
1585 			goto junkput;
1586 		}
1587 	} else {
1588 		error = EINVAL;
1589 		goto junkput;
1590 	}
1591 
1592 	/*
1593 	 * Restore ap_session_t structure.  Include the private data allocated
1594 	 * if it was there.
1595 	 */
1596 	aps = nat->nat_aps;
1597 	if (aps != NULL) {
1598 		KMALLOC(aps, ap_session_t *);
1599 		nat->nat_aps = aps;
1600 		if (aps == NULL) {
1601 			error = ENOMEM;
1602 			goto junkput;
1603 		}
1604 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1605 		if (in != NULL)
1606 			aps->aps_apr = in->in_apr;
1607 		else
1608 			aps->aps_apr = NULL;
1609 		if (aps->aps_psiz != 0) {
1610 			if (aps->aps_psiz > 81920) {
1611 				error = ENOMEM;
1612 				goto junkput;
1613 			}
1614 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1615 			if (aps->aps_data == NULL) {
1616 				error = ENOMEM;
1617 				goto junkput;
1618 			}
1619 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1620 			      aps->aps_psiz);
1621 		} else {
1622 			aps->aps_psiz = 0;
1623 			aps->aps_data = NULL;
1624 		}
1625 	}
1626 
1627 	/*
1628 	 * If there was a filtering rule associated with this entry then
1629 	 * build up a new one.
1630 	 */
1631 	fr = nat->nat_fr;
1632 	if (fr != NULL) {
1633 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1634 			KMALLOC(fr, frentry_t *);
1635 			nat->nat_fr = fr;
1636 			if (fr == NULL) {
1637 				error = ENOMEM;
1638 				goto junkput;
1639 			}
1640 			ipnn->ipn_nat.nat_fr = fr;
1641 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1642 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1643 
1644 			fr->fr_ref = 1;
1645 			fr->fr_dsize = 0;
1646 			fr->fr_data = NULL;
1647 			fr->fr_type = FR_T_NONE;
1648 
1649 			MUTEX_NUKE(&fr->fr_lock);
1650 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1651 		} else {
1652 			if (getlock) {
1653 				READ_ENTER(&ifs->ifs_ipf_nat);
1654 			}
1655 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1656 				if (n->nat_fr == fr)
1657 					break;
1658 
1659 			if (n != NULL) {
1660 				MUTEX_ENTER(&fr->fr_lock);
1661 				fr->fr_ref++;
1662 				MUTEX_EXIT(&fr->fr_lock);
1663 			}
1664 			if (getlock) {
1665 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1666 			}
1667 			if (!n) {
1668 				error = ESRCH;
1669 				goto junkput;
1670 			}
1671 		}
1672 	}
1673 
1674 	if (ipnn != &ipn) {
1675 		KFREES(ipnn, ipn.ipn_dsize);
1676 		ipnn = NULL;
1677 	}
1678 
1679 	nat_calc_chksum_diffs(nat);
1680 
1681 	if (getlock) {
1682 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1683 	}
1684 	error = nat_insert(nat, nat->nat_rev, ifs);
1685 	if ((error == 0) && (aps != NULL)) {
1686 		aps->aps_next = ifs->ifs_ap_sess_list;
1687 		ifs->ifs_ap_sess_list = aps;
1688 	}
1689 	if (getlock) {
1690 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1691 	}
1692 
1693 	if (error == 0)
1694 		return 0;
1695 
1696 	error = ENOMEM;
1697 
1698 junkput:
1699 	if (fr != NULL)
1700 		(void) fr_derefrule(&fr, ifs);
1701 
1702 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1703 		KFREES(ipnn, ipn.ipn_dsize);
1704 	}
1705 	if (nat != NULL) {
1706 		if (aps != NULL) {
1707 			if (aps->aps_data != NULL) {
1708 				KFREES(aps->aps_data, aps->aps_psiz);
1709 			}
1710 			KFREE(aps);
1711 		}
1712 		if (in != NULL) {
1713 			if (in->in_apr)
1714 				appr_free(in->in_apr);
1715 			KFREE(in);
1716 		}
1717 		KFREE(nat);
1718 	}
1719 	return error;
1720 }
1721 
1722 
1723 /* ------------------------------------------------------------------------ */
1724 /* Function:    nat_delete                                                  */
1725 /* Returns:     Nil                                                         */
1726 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1727 /*              logtype(I) - type of LOG record to create before deleting   */
1728 /* Write Lock:  ipf_nat                                                     */
1729 /*                                                                          */
1730 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1731 /* enabled then generate a NAT log record for this event.                   */
1732 /* ------------------------------------------------------------------------ */
1733 static void nat_delete(nat, logtype, ifs)
1734 struct nat *nat;
1735 int logtype;
1736 ipf_stack_t *ifs;
1737 {
1738 	struct ipnat *ipn;
1739 
1740 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1741 		nat_log(nat, logtype, ifs);
1742 
1743 	/*
1744 	 * Take it as a general indication that all the pointers are set if
1745 	 * nat_pnext is set.
1746 	 */
1747 	if (nat->nat_pnext != NULL) {
1748 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1749 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1750 
1751 		*nat->nat_pnext = nat->nat_next;
1752 		if (nat->nat_next != NULL) {
1753 			nat->nat_next->nat_pnext = nat->nat_pnext;
1754 			nat->nat_next = NULL;
1755 		}
1756 		nat->nat_pnext = NULL;
1757 
1758 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1759 		if (nat->nat_hnext[0] != NULL) {
1760 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1761 			nat->nat_hnext[0] = NULL;
1762 		}
1763 		nat->nat_phnext[0] = NULL;
1764 
1765 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1766 		if (nat->nat_hnext[1] != NULL) {
1767 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1768 			nat->nat_hnext[1] = NULL;
1769 		}
1770 		nat->nat_phnext[1] = NULL;
1771 
1772 		if ((nat->nat_flags & SI_WILDP) != 0)
1773 			ifs->ifs_nat_stats.ns_wilds--;
1774 	}
1775 
1776 	if (nat->nat_me != NULL) {
1777 		*nat->nat_me = NULL;
1778 		nat->nat_me = NULL;
1779 	}
1780 
1781 	fr_deletequeueentry(&nat->nat_tqe);
1782 
1783 	MUTEX_ENTER(&nat->nat_lock);
1784 	if (nat->nat_ref > 1) {
1785 		nat->nat_ref--;
1786 		MUTEX_EXIT(&nat->nat_lock);
1787 		return;
1788 	}
1789 	MUTEX_EXIT(&nat->nat_lock);
1790 
1791 	/*
1792 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1793 	 */
1794 	nat->nat_ref = 0;
1795 
1796 #ifdef	IPFILTER_SYNC
1797 	if (nat->nat_sync)
1798 		ipfsync_del(nat->nat_sync);
1799 #endif
1800 
1801 	if (nat->nat_fr != NULL)
1802 		(void)fr_derefrule(&nat->nat_fr, ifs);
1803 
1804 	if (nat->nat_hm != NULL)
1805 		fr_hostmapdel(&nat->nat_hm);
1806 
1807 	/*
1808 	 * If there is an active reference from the nat entry to its parent
1809 	 * rule, decrement the rule's reference count and free it too if no
1810 	 * longer being used.
1811 	 */
1812 	ipn = nat->nat_ptr;
1813 	if (ipn != NULL) {
1814 		ipn->in_space++;
1815 		ipn->in_use--;
1816 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1817 			if (ipn->in_apr)
1818 				appr_free(ipn->in_apr);
1819 			KFREE(ipn);
1820 			ifs->ifs_nat_stats.ns_rules--;
1821 		}
1822 	}
1823 
1824 	MUTEX_DESTROY(&nat->nat_lock);
1825 
1826 	aps_free(nat->nat_aps, ifs);
1827 	ifs->ifs_nat_stats.ns_inuse--;
1828 
1829 	/*
1830 	 * If there's a fragment table entry too for this nat entry, then
1831 	 * dereference that as well.  This is after nat_lock is released
1832 	 * because of Tru64.
1833 	 */
1834 	fr_forgetnat((void *)nat, ifs);
1835 
1836 	KFREE(nat);
1837 }
1838 
1839 
1840 /* ------------------------------------------------------------------------ */
1841 /* Function:    nat_flushtable                                              */
1842 /* Returns:     int - number of NAT rules deleted                           */
1843 /* Parameters:  Nil                                                         */
1844 /*                                                                          */
1845 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1846 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1847 /* ------------------------------------------------------------------------ */
1848 /*
1849  * nat_flushtable - clear the NAT table of all mapping entries.
1850  */
1851 static int nat_flushtable(ifs)
1852 ipf_stack_t *ifs;
1853 {
1854 	nat_t *nat;
1855 	int j = 0;
1856 
1857 	/*
1858 	 * ALL NAT mappings deleted, so lets just make the deletions
1859 	 * quicker.
1860 	 */
1861 	if (ifs->ifs_nat_table[0] != NULL)
1862 		bzero((char *)ifs->ifs_nat_table[0],
1863 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1864 	if (ifs->ifs_nat_table[1] != NULL)
1865 		bzero((char *)ifs->ifs_nat_table[1],
1866 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1867 
1868 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1869 		nat_delete(nat, NL_FLUSH, ifs);
1870 		j++;
1871 	}
1872 
1873 	return j;
1874 }
1875 
1876 
1877 /* ------------------------------------------------------------------------ */
1878 /* Function:    nat_clearlist                                               */
1879 /* Returns:     int - number of NAT/RDR rules deleted                       */
1880 /* Parameters:  Nil                                                         */
1881 /*                                                                          */
1882 /* Delete all rules in the current list of rules.  There is nothing elegant */
1883 /* about this cleanup: simply free all entries on the list of rules and     */
1884 /* clear out the tables used for hashed NAT rule lookups.                   */
1885 /* ------------------------------------------------------------------------ */
1886 static int nat_clearlist(ifs)
1887 ipf_stack_t *ifs;
1888 {
1889 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1890 	int i = 0;
1891 
1892 	if (ifs->ifs_nat_rules != NULL)
1893 		bzero((char *)ifs->ifs_nat_rules,
1894 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1895 	if (ifs->ifs_rdr_rules != NULL)
1896 		bzero((char *)ifs->ifs_rdr_rules,
1897 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1898 
1899 	while ((n = *np) != NULL) {
1900 		*np = n->in_next;
1901 		if (n->in_use == 0) {
1902 			if (n->in_apr != NULL)
1903 				appr_free(n->in_apr);
1904 			KFREE(n);
1905 			ifs->ifs_nat_stats.ns_rules--;
1906 		} else {
1907 			n->in_flags |= IPN_DELETE;
1908 			n->in_next = NULL;
1909 		}
1910 		i++;
1911 	}
1912 	ifs->ifs_nat_masks = 0;
1913 	ifs->ifs_rdr_masks = 0;
1914 	return i;
1915 }
1916 
1917 
1918 /* ------------------------------------------------------------------------ */
1919 /* Function:    nat_newmap                                                  */
1920 /* Returns:     int - -1 == error, 0 == success                             */
1921 /* Parameters:  fin(I) - pointer to packet information                      */
1922 /*              nat(I) - pointer to NAT entry                               */
1923 /*              ni(I)  - pointer to structure with misc. information needed */
1924 /*                       to create new NAT entry.                           */
1925 /*                                                                          */
1926 /* Given an empty NAT structure, populate it with new information about a   */
1927 /* new NAT session, as defined by the matching NAT rule.                    */
1928 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1929 /* to the new IP address for the translation.                               */
1930 /* ------------------------------------------------------------------------ */
1931 static INLINE int nat_newmap(fin, nat, ni)
1932 fr_info_t *fin;
1933 nat_t *nat;
1934 natinfo_t *ni;
1935 {
1936 	u_short st_port, dport, sport, port, sp, dp;
1937 	struct in_addr in, inb;
1938 	hostmap_t *hm;
1939 	u_32_t flags;
1940 	u_32_t st_ip;
1941 	ipnat_t *np;
1942 	nat_t *natl;
1943 	int l;
1944 	ipf_stack_t *ifs = fin->fin_ifs;
1945 
1946 	/*
1947 	 * If it's an outbound packet which doesn't match any existing
1948 	 * record, then create a new port
1949 	 */
1950 	l = 0;
1951 	hm = NULL;
1952 	np = ni->nai_np;
1953 	st_ip = np->in_nip;
1954 	st_port = np->in_pnext;
1955 	flags = ni->nai_flags;
1956 	sport = ni->nai_sport;
1957 	dport = ni->nai_dport;
1958 
1959 	/*
1960 	 * Do a loop until we either run out of entries to try or we find
1961 	 * a NAT mapping that isn't currently being used.  This is done
1962 	 * because the change to the source is not (usually) being fixed.
1963 	 */
1964 	do {
1965 		port = 0;
1966 		in.s_addr = htonl(np->in_nip);
1967 		if (l == 0) {
1968 			/*
1969 			 * Check to see if there is an existing NAT
1970 			 * setup for this IP address pair.
1971 			 */
1972 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1973 					 in, 0, ifs);
1974 			if (hm != NULL)
1975 				in.s_addr = hm->hm_mapip.s_addr;
1976 		} else if ((l == 1) && (hm != NULL)) {
1977 			fr_hostmapdel(&hm);
1978 		}
1979 		in.s_addr = ntohl(in.s_addr);
1980 
1981 		nat->nat_hm = hm;
1982 
1983 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1984 			if (l > 0)
1985 				return -1;
1986 		}
1987 
1988 		if (np->in_redir == NAT_BIMAP &&
1989 		    np->in_inmsk == np->in_outmsk) {
1990 			/*
1991 			 * map the address block in a 1:1 fashion
1992 			 */
1993 			in.s_addr = np->in_outip;
1994 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1995 			in.s_addr = ntohl(in.s_addr);
1996 
1997 		} else if (np->in_redir & NAT_MAPBLK) {
1998 			if ((l >= np->in_ppip) || ((l > 0) &&
1999 			     !(flags & IPN_TCPUDP)))
2000 				return -1;
2001 			/*
2002 			 * map-block - Calculate destination address.
2003 			 */
2004 			in.s_addr = ntohl(fin->fin_saddr);
2005 			in.s_addr &= ntohl(~np->in_inmsk);
2006 			inb.s_addr = in.s_addr;
2007 			in.s_addr /= np->in_ippip;
2008 			in.s_addr &= ntohl(~np->in_outmsk);
2009 			in.s_addr += ntohl(np->in_outip);
2010 			/*
2011 			 * Calculate destination port.
2012 			 */
2013 			if ((flags & IPN_TCPUDP) &&
2014 			    (np->in_ppip != 0)) {
2015 				port = ntohs(sport) + l;
2016 				port %= np->in_ppip;
2017 				port += np->in_ppip *
2018 					(inb.s_addr % np->in_ippip);
2019 				port += MAPBLK_MINPORT;
2020 				port = htons(port);
2021 			}
2022 
2023 		} else if ((np->in_outip == 0) &&
2024 			   (np->in_outmsk == 0xffffffff)) {
2025 			/*
2026 			 * 0/32 - use the interface's IP address.
2027 			 */
2028 			if ((l > 0) ||
2029 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2030 				       &in, NULL, fin->fin_ifs) == -1)
2031 				return -1;
2032 			in.s_addr = ntohl(in.s_addr);
2033 
2034 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2035 			/*
2036 			 * 0/0 - use the original source address/port.
2037 			 */
2038 			if (l > 0)
2039 				return -1;
2040 			in.s_addr = ntohl(fin->fin_saddr);
2041 
2042 		} else if ((np->in_outmsk != 0xffffffff) &&
2043 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2044 			np->in_nip++;
2045 
2046 		natl = NULL;
2047 
2048 		if ((flags & IPN_TCPUDP) &&
2049 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2050 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2051 			/*
2052 			 * "ports auto" (without map-block)
2053 			 */
2054 			if ((l > 0) && (l % np->in_ppip == 0)) {
2055 				if (l > np->in_space) {
2056 					return -1;
2057 				} else if ((l > np->in_ppip) &&
2058 					   np->in_outmsk != 0xffffffff)
2059 					np->in_nip++;
2060 			}
2061 			if (np->in_ppip != 0) {
2062 				port = ntohs(sport);
2063 				port += (l % np->in_ppip);
2064 				port %= np->in_ppip;
2065 				port += np->in_ppip *
2066 					(ntohl(fin->fin_saddr) %
2067 					 np->in_ippip);
2068 				port += MAPBLK_MINPORT;
2069 				port = htons(port);
2070 			}
2071 
2072 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2073 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2074 			/*
2075 			 * Standard port translation.  Select next port.
2076 			 */
2077 			port = htons(np->in_pnext++);
2078 
2079 			if (np->in_pnext > ntohs(np->in_pmax)) {
2080 				np->in_pnext = ntohs(np->in_pmin);
2081 				if (np->in_outmsk != 0xffffffff)
2082 					np->in_nip++;
2083 			}
2084 		}
2085 
2086 		if (np->in_flags & IPN_IPRANGE) {
2087 			if (np->in_nip > ntohl(np->in_outmsk))
2088 				np->in_nip = ntohl(np->in_outip);
2089 		} else {
2090 			if ((np->in_outmsk != 0xffffffff) &&
2091 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2092 			    ntohl(np->in_outip))
2093 				np->in_nip = ntohl(np->in_outip) + 1;
2094 		}
2095 
2096 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2097 			port = sport;
2098 
2099 		/*
2100 		 * Here we do a lookup of the connection as seen from
2101 		 * the outside.  If an IP# pair already exists, try
2102 		 * again.  So if you have A->B becomes C->B, you can
2103 		 * also have D->E become C->E but not D->B causing
2104 		 * another C->B.  Also take protocol and ports into
2105 		 * account when determining whether a pre-existing
2106 		 * NAT setup will cause an external conflict where
2107 		 * this is appropriate.
2108 		 */
2109 		inb.s_addr = htonl(in.s_addr);
2110 		sp = fin->fin_data[0];
2111 		dp = fin->fin_data[1];
2112 		fin->fin_data[0] = fin->fin_data[1];
2113 		fin->fin_data[1] = htons(port);
2114 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2115 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2116 		fin->fin_data[0] = sp;
2117 		fin->fin_data[1] = dp;
2118 
2119 		/*
2120 		 * Has the search wrapped around and come back to the
2121 		 * start ?
2122 		 */
2123 		if ((natl != NULL) &&
2124 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2125 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2126 			return -1;
2127 		l++;
2128 	} while (natl != NULL);
2129 
2130 	if (np->in_space > 0)
2131 		np->in_space--;
2132 
2133 	/* Setup the NAT table */
2134 	nat->nat_inip = fin->fin_src;
2135 	nat->nat_outip.s_addr = htonl(in.s_addr);
2136 	nat->nat_oip = fin->fin_dst;
2137 	if (nat->nat_hm == NULL)
2138 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2139 					  nat->nat_outip, 0, ifs);
2140 
2141 	if (flags & IPN_TCPUDP) {
2142 		nat->nat_inport = sport;
2143 		nat->nat_outport = port;	/* sport */
2144 		nat->nat_oport = dport;
2145 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2146 	} else if (flags & IPN_ICMPQUERY) {
2147 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2148 		nat->nat_inport = port;
2149 		nat->nat_outport = port;
2150 	}
2151 
2152 	ni->nai_ip.s_addr = in.s_addr;
2153 	ni->nai_port = port;
2154 	ni->nai_nport = dport;
2155 	return 0;
2156 }
2157 
2158 
2159 /* ------------------------------------------------------------------------ */
2160 /* Function:    nat_newrdr                                                  */
2161 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2162 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2163 /* Parameters:  fin(I) - pointer to packet information                      */
2164 /*              nat(I) - pointer to NAT entry                               */
2165 /*              ni(I)  - pointer to structure with misc. information needed */
2166 /*                       to create new NAT entry.                           */
2167 /*                                                                          */
2168 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2169 /* to the new IP address for the translation.                               */
2170 /* ------------------------------------------------------------------------ */
2171 static INLINE int nat_newrdr(fin, nat, ni)
2172 fr_info_t *fin;
2173 nat_t *nat;
2174 natinfo_t *ni;
2175 {
2176 	u_short nport, dport, sport;
2177 	struct in_addr in, inb;
2178 	u_short sp, dp;
2179 	hostmap_t *hm;
2180 	u_32_t flags;
2181 	ipnat_t *np;
2182 	nat_t *natl;
2183 	int move;
2184 	ipf_stack_t *ifs = fin->fin_ifs;
2185 
2186 	move = 1;
2187 	hm = NULL;
2188 	in.s_addr = 0;
2189 	np = ni->nai_np;
2190 	flags = ni->nai_flags;
2191 	sport = ni->nai_sport;
2192 	dport = ni->nai_dport;
2193 
2194 	/*
2195 	 * If the matching rule has IPN_STICKY set, then we want to have the
2196 	 * same rule kick in as before.  Why would this happen?  If you have
2197 	 * a collection of rdr rules with "round-robin sticky", the current
2198 	 * packet might match a different one to the previous connection but
2199 	 * we want the same destination to be used.
2200 	 */
2201 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2202 	    (IPN_ROUNDR|IPN_STICKY)) {
2203 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2204 				 (u_32_t)dport, ifs);
2205 		if (hm != NULL) {
2206 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2207 			np = hm->hm_ipnat;
2208 			ni->nai_np = np;
2209 			move = 0;
2210 		}
2211 	}
2212 
2213 	/*
2214 	 * Otherwise, it's an inbound packet. Most likely, we don't
2215 	 * want to rewrite source ports and source addresses. Instead,
2216 	 * we want to rewrite to a fixed internal address and fixed
2217 	 * internal port.
2218 	 */
2219 	if (np->in_flags & IPN_SPLIT) {
2220 		in.s_addr = np->in_nip;
2221 
2222 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2223 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2224 					 in, (u_32_t)dport, ifs);
2225 			if (hm != NULL) {
2226 				in.s_addr = hm->hm_mapip.s_addr;
2227 				move = 0;
2228 			}
2229 		}
2230 
2231 		if (hm == NULL || hm->hm_ref == 1) {
2232 			if (np->in_inip == htonl(in.s_addr)) {
2233 				np->in_nip = ntohl(np->in_inmsk);
2234 				move = 0;
2235 			} else {
2236 				np->in_nip = ntohl(np->in_inip);
2237 			}
2238 		}
2239 
2240 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2241 		/*
2242 		 * 0/32 - use the interface's IP address.
2243 		 */
2244 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2245 			   fin->fin_ifs) == -1)
2246 			return -1;
2247 		in.s_addr = ntohl(in.s_addr);
2248 
2249 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2250 		/*
2251 		 * 0/0 - use the original destination address/port.
2252 		 */
2253 		in.s_addr = ntohl(fin->fin_daddr);
2254 
2255 	} else if (np->in_redir == NAT_BIMAP &&
2256 		   np->in_inmsk == np->in_outmsk) {
2257 		/*
2258 		 * map the address block in a 1:1 fashion
2259 		 */
2260 		in.s_addr = np->in_inip;
2261 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2262 		in.s_addr = ntohl(in.s_addr);
2263 	} else {
2264 		in.s_addr = ntohl(np->in_inip);
2265 	}
2266 
2267 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2268 		nport = dport;
2269 	else {
2270 		/*
2271 		 * Whilst not optimized for the case where
2272 		 * pmin == pmax, the gain is not significant.
2273 		 */
2274 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2275 		    (np->in_pmin != np->in_pmax)) {
2276 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2277 				ntohs(np->in_pnext);
2278 			nport = htons(nport);
2279 		} else
2280 			nport = np->in_pnext;
2281 	}
2282 
2283 	/*
2284 	 * When the redirect-to address is set to 0.0.0.0, just
2285 	 * assume a blank `forwarding' of the packet.  We don't
2286 	 * setup any translation for this either.
2287 	 */
2288 	if (in.s_addr == 0) {
2289 		if (nport == dport)
2290 			return -1;
2291 		in.s_addr = ntohl(fin->fin_daddr);
2292 	}
2293 
2294 	/*
2295 	 * Check to see if this redirect mapping already exists and if
2296 	 * it does, return "failure" (allowing it to be created will just
2297 	 * cause one or both of these "connections" to stop working.)
2298 	 */
2299 	inb.s_addr = htonl(in.s_addr);
2300 	sp = fin->fin_data[0];
2301 	dp = fin->fin_data[1];
2302 	fin->fin_data[1] = fin->fin_data[0];
2303 	fin->fin_data[0] = ntohs(nport);
2304 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2305 		    (u_int)fin->fin_p, inb, fin->fin_src);
2306 	fin->fin_data[0] = sp;
2307 	fin->fin_data[1] = dp;
2308 	if (natl != NULL)
2309 		return (-1);
2310 
2311 	nat->nat_inip.s_addr = htonl(in.s_addr);
2312 	nat->nat_outip = fin->fin_dst;
2313 	nat->nat_oip = fin->fin_src;
2314 
2315 	ni->nai_ip.s_addr = in.s_addr;
2316 	ni->nai_nport = nport;
2317 	ni->nai_port = sport;
2318 
2319 	if (flags & IPN_TCPUDP) {
2320 		nat->nat_inport = nport;
2321 		nat->nat_outport = dport;
2322 		nat->nat_oport = sport;
2323 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2324 	} else if (flags & IPN_ICMPQUERY) {
2325 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2326 		nat->nat_inport = nport;
2327 		nat->nat_outport = nport;
2328 	}
2329 
2330 	return move;
2331 }
2332 
2333 /* ------------------------------------------------------------------------ */
2334 /* Function:    nat_new                                                     */
2335 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2336 /*                       else pointer to new NAT structure                  */
2337 /* Parameters:  fin(I)       - pointer to packet information                */
2338 /*              np(I)        - pointer to NAT rule                          */
2339 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2340 /*              flags(I)     - flags describing the current packet          */
2341 /*              direction(I) - direction of packet (in/out)                 */
2342 /* Write Lock:  ipf_nat                                                     */
2343 /*                                                                          */
2344 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2345 /* in any way.                                                              */
2346 /*                                                                          */
2347 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2348 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2349 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2350 /* and (3) building that structure and putting it into the NAT table(s).    */
2351 /* ------------------------------------------------------------------------ */
2352 nat_t *nat_new(fin, np, natsave, flags, direction)
2353 fr_info_t *fin;
2354 ipnat_t *np;
2355 nat_t **natsave;
2356 u_int flags;
2357 int direction;
2358 {
2359 	tcphdr_t *tcp = NULL;
2360 	hostmap_t *hm = NULL;
2361 	nat_t *nat, *natl;
2362 	u_int nflags;
2363 	natinfo_t ni;
2364 	int move;
2365 	ipf_stack_t *ifs = fin->fin_ifs;
2366 
2367 	/*
2368 	 * Trigger automatic call to nat_extraflush() if the
2369 	 * table has reached capcity specified by hi watermark.
2370 	 */
2371 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2372 		ifs->ifs_nat_doflush = 1;
2373 
2374 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2375 		ifs->ifs_nat_stats.ns_memfail++;
2376 		return NULL;
2377 	}
2378 
2379 	move = 1;
2380 	nflags = np->in_flags & flags;
2381 	nflags &= NAT_FROMRULE;
2382 
2383 	ni.nai_np = np;
2384 	ni.nai_nflags = nflags;
2385 	ni.nai_flags = flags;
2386 
2387 	/* Give me a new nat */
2388 	KMALLOC(nat, nat_t *);
2389 	if (nat == NULL) {
2390 		ifs->ifs_nat_stats.ns_memfail++;
2391 		/*
2392 		 * Try to automatically tune the max # of entries in the
2393 		 * table allowed to be less than what will cause kmem_alloc()
2394 		 * to fail and try to eliminate panics due to out of memory
2395 		 * conditions arising.
2396 		 */
2397 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2398 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2399 			printf("ipf_nattable_max reduced to %d\n",
2400 				ifs->ifs_ipf_nattable_max);
2401 		}
2402 		return NULL;
2403 	}
2404 
2405 	if (flags & IPN_TCPUDP) {
2406 		tcp = fin->fin_dp;
2407 		ni.nai_sport = htons(fin->fin_sport);
2408 		ni.nai_dport = htons(fin->fin_dport);
2409 	} else if (flags & IPN_ICMPQUERY) {
2410 		/*
2411 		 * In the ICMP query NAT code, we translate the ICMP id fields
2412 		 * to make them unique. This is indepedent of the ICMP type
2413 		 * (e.g. in the unlikely event that a host sends an echo and
2414 		 * an tstamp request with the same id, both packets will have
2415 		 * their ip address/id field changed in the same way).
2416 		 */
2417 		/* The icmp_id field is used by the sender to identify the
2418 		 * process making the icmp request. (the receiver justs
2419 		 * copies it back in its response). So, it closely matches
2420 		 * the concept of source port. We overlay sport, so we can
2421 		 * maximally reuse the existing code.
2422 		 */
2423 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2424 		ni.nai_dport = ni.nai_sport;
2425 	}
2426 
2427 	bzero((char *)nat, sizeof(*nat));
2428 	nat->nat_flags = flags;
2429 	nat->nat_redir = np->in_redir;
2430 
2431 	if ((flags & NAT_SLAVE) == 0) {
2432 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2433 	}
2434 
2435 	/*
2436 	 * Search the current table for a match.
2437 	 */
2438 	if (direction == NAT_OUTBOUND) {
2439 		/*
2440 		 * We can now arrange to call this for the same connection
2441 		 * because ipf_nat_new doesn't protect the code path into
2442 		 * this function.
2443 		 */
2444 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2445 				     fin->fin_src, fin->fin_dst);
2446 		if (natl != NULL) {
2447 			KFREE(nat);
2448 			nat = natl;
2449 			goto done;
2450 		}
2451 
2452 		move = nat_newmap(fin, nat, &ni);
2453 		if (move == -1)
2454 			goto badnat;
2455 
2456 		np = ni.nai_np;
2457 	} else {
2458 		/*
2459 		 * NAT_INBOUND is used only for redirects rules
2460 		 */
2461 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2462 				    fin->fin_src, fin->fin_dst);
2463 		if (natl != NULL) {
2464 			KFREE(nat);
2465 			nat = natl;
2466 			goto done;
2467 		}
2468 
2469 		move = nat_newrdr(fin, nat, &ni);
2470 		if (move == -1)
2471 			goto badnat;
2472 
2473 		np = ni.nai_np;
2474 	}
2475 
2476 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2477 		if (np->in_redir == NAT_REDIRECT) {
2478 			nat_delrdr(np);
2479 			nat_addrdr(np, ifs);
2480 		} else if (np->in_redir == NAT_MAP) {
2481 			nat_delnat(np);
2482 			nat_addnat(np, ifs);
2483 		}
2484 	}
2485 
2486 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2487 		goto badnat;
2488 	}
2489 
2490 	nat_calc_chksum_diffs(nat);
2491 
2492 	if (flags & SI_WILDP)
2493 		ifs->ifs_nat_stats.ns_wilds++;
2494 	goto done;
2495 badnat:
2496 	ifs->ifs_nat_stats.ns_badnat++;
2497 	if ((hm = nat->nat_hm) != NULL)
2498 		fr_hostmapdel(&hm);
2499 	KFREE(nat);
2500 	nat = NULL;
2501 done:
2502 	if ((flags & NAT_SLAVE) == 0) {
2503 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2504 	}
2505 	return nat;
2506 }
2507 
2508 
2509 /* ------------------------------------------------------------------------ */
2510 /* Function:    nat_finalise                                                */
2511 /* Returns:     int - 0 == sucess, -1 == failure                            */
2512 /* Parameters:  fin(I) - pointer to packet information                      */
2513 /*              nat(I) - pointer to NAT entry                               */
2514 /*              ni(I)  - pointer to structure with misc. information needed */
2515 /*                       to create new NAT entry.                           */
2516 /* Write Lock:  ipf_nat                                                     */
2517 /*                                                                          */
2518 /* This is the tail end of constructing a new NAT entry and is the same     */
2519 /* for both IPv4 and IPv6.                                                  */
2520 /* ------------------------------------------------------------------------ */
2521 /*ARGSUSED*/
2522 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2523 fr_info_t *fin;
2524 nat_t *nat;
2525 natinfo_t *ni;
2526 tcphdr_t *tcp;
2527 nat_t **natsave;
2528 int direction;
2529 {
2530 	frentry_t *fr;
2531 	ipnat_t *np;
2532 	ipf_stack_t *ifs = fin->fin_ifs;
2533 
2534 	np = ni->nai_np;
2535 
2536 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2537 
2538 #ifdef	IPFILTER_SYNC
2539 	if ((nat->nat_flags & SI_CLONE) == 0)
2540 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2541 #endif
2542 
2543 	nat->nat_me = natsave;
2544 	nat->nat_dir = direction;
2545 	nat->nat_ifps[0] = np->in_ifps[0];
2546 	nat->nat_ifps[1] = np->in_ifps[1];
2547 	nat->nat_ptr = np;
2548 	nat->nat_p = fin->fin_p;
2549 	nat->nat_mssclamp = np->in_mssclamp;
2550 	fr = fin->fin_fr;
2551 	nat->nat_fr = fr;
2552 
2553 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2554 		if (appr_new(fin, nat) == -1)
2555 			return -1;
2556 
2557 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2558 		if (ifs->ifs_nat_logging)
2559 			nat_log(nat, (u_int)np->in_redir, ifs);
2560 		np->in_use++;
2561 		if (fr != NULL) {
2562 			MUTEX_ENTER(&fr->fr_lock);
2563 			fr->fr_ref++;
2564 			MUTEX_EXIT(&fr->fr_lock);
2565 		}
2566 		return 0;
2567 	}
2568 
2569 	/*
2570 	 * nat_insert failed, so cleanup time...
2571 	 */
2572 	return -1;
2573 }
2574 
2575 
2576 /* ------------------------------------------------------------------------ */
2577 /* Function:   nat_insert                                                   */
2578 /* Returns:    int - 0 == sucess, -1 == failure                             */
2579 /* Parameters: nat(I) - pointer to NAT structure                            */
2580 /*             rev(I) - flag indicating forward/reverse direction of packet */
2581 /* Write Lock: ipf_nat                                                      */
2582 /*                                                                          */
2583 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2584 /* list of active NAT entries.  Adjust global counters when complete.       */
2585 /* ------------------------------------------------------------------------ */
2586 int	nat_insert(nat, rev, ifs)
2587 nat_t	*nat;
2588 int	rev;
2589 ipf_stack_t *ifs;
2590 {
2591 	u_int hv1, hv2;
2592 	nat_t **natp;
2593 
2594 	/*
2595 	 * Try and return an error as early as possible, so calculate the hash
2596 	 * entry numbers first and then proceed.
2597 	 */
2598 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2599 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2600 				  0xffffffff);
2601 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2602 				  ifs->ifs_ipf_nattable_sz);
2603 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2604 				  0xffffffff);
2605 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2606 				  ifs->ifs_ipf_nattable_sz);
2607 	} else {
2608 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2609 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2610 				  ifs->ifs_ipf_nattable_sz);
2611 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2612 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2613 				  ifs->ifs_ipf_nattable_sz);
2614 	}
2615 
2616 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2617 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2618 		return -1;
2619 	}
2620 
2621 	nat->nat_hv[0] = hv1;
2622 	nat->nat_hv[1] = hv2;
2623 
2624 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2625 
2626 	nat->nat_rev = rev;
2627 	nat->nat_ref = 1;
2628 	nat->nat_bytes[0] = 0;
2629 	nat->nat_pkts[0] = 0;
2630 	nat->nat_bytes[1] = 0;
2631 	nat->nat_pkts[1] = 0;
2632 
2633 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2634 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2635 
2636 	if (nat->nat_ifnames[1][0] !='\0') {
2637 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2638 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2639 	} else {
2640 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2641 			       LIFNAMSIZ);
2642 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2643 		nat->nat_ifps[1] = nat->nat_ifps[0];
2644 	}
2645 
2646 	nat->nat_next = ifs->ifs_nat_instances;
2647 	nat->nat_pnext = &ifs->ifs_nat_instances;
2648 	if (ifs->ifs_nat_instances)
2649 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2650 	ifs->ifs_nat_instances = nat;
2651 
2652 	natp = &ifs->ifs_nat_table[0][hv1];
2653 	if (*natp)
2654 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2655 	nat->nat_phnext[0] = natp;
2656 	nat->nat_hnext[0] = *natp;
2657 	*natp = nat;
2658 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2659 
2660 	natp = &ifs->ifs_nat_table[1][hv2];
2661 	if (*natp)
2662 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2663 	nat->nat_phnext[1] = natp;
2664 	nat->nat_hnext[1] = *natp;
2665 	*natp = nat;
2666 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2667 
2668 	fr_setnatqueue(nat, rev, ifs);
2669 
2670 	ifs->ifs_nat_stats.ns_added++;
2671 	ifs->ifs_nat_stats.ns_inuse++;
2672 	return 0;
2673 }
2674 
2675 
2676 /* ------------------------------------------------------------------------ */
2677 /* Function:    nat_icmperrorlookup                                         */
2678 /* Returns:     nat_t* - point to matching NAT structure                    */
2679 /* Parameters:  fin(I) - pointer to packet information                      */
2680 /*              dir(I) - direction of packet (in/out)                       */
2681 /*                                                                          */
2682 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2683 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2684 /* the required length.                                                     */
2685 /* ------------------------------------------------------------------------ */
2686 nat_t *nat_icmperrorlookup(fin, dir)
2687 fr_info_t *fin;
2688 int dir;
2689 {
2690 	int flags = 0, minlen;
2691 	icmphdr_t *orgicmp;
2692 	tcphdr_t *tcp = NULL;
2693 	u_short data[2];
2694 	nat_t *nat;
2695 	ip_t *oip;
2696 	u_int p;
2697 
2698 	/*
2699 	 * Does it at least have the return (basic) IP header ?
2700 	 * Only a basic IP header (no options) should be with an ICMP error
2701 	 * header.  Also, if it's not an error type, then return.
2702 	 */
2703 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2704 		return NULL;
2705 
2706 	/*
2707 	 * Check packet size
2708 	 */
2709 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2710 	minlen = IP_HL(oip) << 2;
2711 	if ((minlen < sizeof(ip_t)) ||
2712 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2713 		return NULL;
2714 	/*
2715 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2716 	 * header claimed in the encapsulated part which is of concern.  It
2717 	 * may be too big to be in this buffer but not so big that it's
2718 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2719 	 * This is possible because we don't know how big oip_hl is when we
2720 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2721 	 * all here now.
2722 	 */
2723 #ifdef  _KERNEL
2724 	{
2725 	mb_t *m;
2726 
2727 	m = fin->fin_m;
2728 # if defined(MENTAT)
2729 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2730 		return NULL;
2731 # else
2732 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2733 	    (char *)fin->fin_ip + M_LEN(m))
2734 		return NULL;
2735 # endif
2736 	}
2737 #endif
2738 
2739 	if (fin->fin_daddr != oip->ip_src.s_addr)
2740 		return NULL;
2741 
2742 	p = oip->ip_p;
2743 	if (p == IPPROTO_TCP)
2744 		flags = IPN_TCP;
2745 	else if (p == IPPROTO_UDP)
2746 		flags = IPN_UDP;
2747 	else if (p == IPPROTO_ICMP) {
2748 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2749 
2750 		/* see if this is related to an ICMP query */
2751 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2752 			data[0] = fin->fin_data[0];
2753 			data[1] = fin->fin_data[1];
2754 			fin->fin_data[0] = 0;
2755 			fin->fin_data[1] = orgicmp->icmp_id;
2756 
2757 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2758 			/*
2759 			 * NOTE : dir refers to the direction of the original
2760 			 *        ip packet. By definition the icmp error
2761 			 *        message flows in the opposite direction.
2762 			 */
2763 			if (dir == NAT_INBOUND)
2764 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2765 						   oip->ip_src);
2766 			else
2767 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2768 						    oip->ip_src);
2769 			fin->fin_data[0] = data[0];
2770 			fin->fin_data[1] = data[1];
2771 			return nat;
2772 		}
2773 	}
2774 
2775 	if (flags & IPN_TCPUDP) {
2776 		minlen += 8;		/* + 64bits of data to get ports */
2777 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2778 			return NULL;
2779 
2780 		data[0] = fin->fin_data[0];
2781 		data[1] = fin->fin_data[1];
2782 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2783 		fin->fin_data[0] = ntohs(tcp->th_dport);
2784 		fin->fin_data[1] = ntohs(tcp->th_sport);
2785 
2786 		if (dir == NAT_INBOUND) {
2787 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2788 					   oip->ip_src);
2789 		} else {
2790 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2791 					    oip->ip_src);
2792 		}
2793 		fin->fin_data[0] = data[0];
2794 		fin->fin_data[1] = data[1];
2795 		return nat;
2796 	}
2797 	if (dir == NAT_INBOUND)
2798 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2799 	else
2800 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2801 }
2802 
2803 
2804 /* ------------------------------------------------------------------------ */
2805 /* Function:    nat_icmperror                                               */
2806 /* Returns:     nat_t* - point to matching NAT structure                    */
2807 /* Parameters:  fin(I)    - pointer to packet information                   */
2808 /*              nflags(I) - NAT flags for this packet                       */
2809 /*              dir(I)    - direction of packet (in/out)                    */
2810 /*                                                                          */
2811 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2812 /* session.  This will correct both packet header data and checksums.       */
2813 /*                                                                          */
2814 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2815 /* a NAT'd ICMP packet gets correctly recognised.                           */
2816 /* ------------------------------------------------------------------------ */
2817 nat_t *nat_icmperror(fin, nflags, dir)
2818 fr_info_t *fin;
2819 u_int *nflags;
2820 int dir;
2821 {
2822 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2823 	struct in_addr in;
2824 	icmphdr_t *icmp, *orgicmp;
2825 	int dlen;
2826 	udphdr_t *udp;
2827 	tcphdr_t *tcp;
2828 	nat_t *nat;
2829 	ip_t *oip;
2830 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2831 		return NULL;
2832 
2833 	/*
2834 	 * nat_icmperrorlookup() looks up nat entry associated with the
2835 	 * offending IP packet and returns pointer to the entry, or NULL
2836 	 * if packet wasn't natted or for `defective' packets.
2837 	 */
2838 
2839 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2840 		return NULL;
2841 
2842 	sumd2 = 0;
2843 	*nflags = IPN_ICMPERR;
2844 	icmp = fin->fin_dp;
2845 	oip = (ip_t *)&icmp->icmp_ip;
2846 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2847 	tcp = (tcphdr_t *)udp;
2848 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2849 
2850 	/*
2851 	 * Need to adjust ICMP header to include the real IP#'s and
2852 	 * port #'s.  There are three steps required.
2853 	 *
2854 	 * Step 1
2855 	 * Fix the IP addresses in the offending IP packet and update
2856 	 * ip header checksum to compensate for the change.
2857 	 *
2858 	 * No update needed here for icmp_cksum because the ICMP checksum
2859 	 * is calculated over the complete ICMP packet, which includes the
2860 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2861 	 * cancel each other out (if the delta for the IP address is x,
2862 	 * then the delta for ip_sum is minus x).
2863 	 */
2864 
2865 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2866 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2867 		in = nat->nat_inip;
2868 		oip->ip_src = in;
2869 	} else {
2870 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2871 		in = nat->nat_outip;
2872 		oip->ip_dst = in;
2873 	}
2874 
2875 	sum2 = LONG_SUM(ntohl(in.s_addr));
2876 	CALC_SUMD(sum1, sum2, sumd);
2877 	fix_datacksum(&oip->ip_sum, sumd);
2878 
2879 	/*
2880 	 * Step 2
2881 	 * Perform other adjustments based on protocol of offending packet.
2882 	 */
2883 
2884 	switch (oip->ip_p) {
2885 		case IPPROTO_TCP :
2886 		case IPPROTO_UDP :
2887 
2888 			/*
2889 			* For offending TCP/UDP IP packets, translate the ports
2890 			* based on the NAT specification.
2891 			*
2892 			* Advance notice : Now it becomes complicated :-)
2893 			*
2894 			* Since the port and IP addresse fields are both part
2895 			* of the TCP/UDP checksum of the offending IP packet,
2896 			* we need to adjust that checksum as well.
2897 			*
2898 			* To further complicate things, the TCP/UDP checksum
2899 			* may not be present.  We must check to see if the
2900 			* length of the data portion is big enough to hold
2901 			* the checksum.  In the UDP case, a test to determine
2902 			* if the checksum is even set is also required.
2903 			*
2904 			* Any changes to an IP address, port or checksum within
2905 			* the ICMP packet requires a change to icmp_cksum.
2906 			*
2907 			* Be extremely careful here ... The change is dependent
2908 			* upon whether or not the TCP/UPD checksum is present.
2909 			*
2910 			* If TCP/UPD checksum is present, the icmp_cksum must
2911 			* compensate for checksum modification resulting from
2912 			* IP address change only.  Port change and resulting
2913 			* data checksum adjustments cancel each other out.
2914 			*
2915 			* If TCP/UDP checksum is not present, icmp_cksum must
2916 			* compensate for port change only.  The IP address
2917 			* change does not modify anything else in this case.
2918 			*/
2919 
2920 			psum1 = 0;
2921 			psum2 = 0;
2922 			psumd = 0;
2923 
2924 			if ((tcp->th_dport == nat->nat_oport) &&
2925 			    (tcp->th_sport != nat->nat_inport)) {
2926 
2927 				/*
2928 				 * Translate the source port.
2929 				 */
2930 
2931 				psum1 = ntohs(tcp->th_sport);
2932 				psum2 = ntohs(nat->nat_inport);
2933 				tcp->th_sport = nat->nat_inport;
2934 
2935 			} else if ((tcp->th_sport == nat->nat_oport) &&
2936 				    (tcp->th_dport != nat->nat_outport)) {
2937 
2938 				/*
2939 				 * Translate the destination port.
2940 				 */
2941 
2942 				psum1 = ntohs(tcp->th_dport);
2943 				psum2 = ntohs(nat->nat_outport);
2944 				tcp->th_dport = nat->nat_outport;
2945 			}
2946 
2947 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2948 
2949 				/*
2950 				 * TCP checksum present.
2951 				 *
2952 				 * Adjust data checksum and icmp checksum to
2953 				 * compensate for any IP address change.
2954 				 */
2955 
2956 				sum1 = ntohs(tcp->th_sum);
2957 				fix_datacksum(&tcp->th_sum, sumd);
2958 				sum2 = ntohs(tcp->th_sum);
2959 				sumd2 = sumd << 1;
2960 				CALC_SUMD(sum1, sum2, sumd);
2961 				sumd2 += sumd;
2962 
2963 				/*
2964 				 * Also make data checksum adjustment to
2965 				 * compensate for any port change.
2966 				 */
2967 
2968 				if (psum1 != psum2) {
2969 					CALC_SUMD(psum1, psum2, psumd);
2970 					fix_datacksum(&tcp->th_sum, psumd);
2971 				}
2972 
2973 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2974 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2975 
2976 				/*
2977 				 * The UDP checksum is present and set.
2978 				 *
2979 				 * Adjust data checksum and icmp checksum to
2980 				 * compensate for any IP address change.
2981 				 */
2982 
2983 				sum1 = ntohs(udp->uh_sum);
2984 				fix_datacksum(&udp->uh_sum, sumd);
2985 				sum2 = ntohs(udp->uh_sum);
2986 				sumd2 = sumd << 1;
2987 				CALC_SUMD(sum1, sum2, sumd);
2988 				sumd2 += sumd;
2989 
2990 				/*
2991 				 * Also make data checksum adjustment to
2992 				 * compensate for any port change.
2993 				 */
2994 
2995 				if (psum1 != psum2) {
2996 					CALC_SUMD(psum1, psum2, psumd);
2997 					fix_datacksum(&udp->uh_sum, psumd);
2998 				}
2999 
3000 			} else {
3001 
3002 				/*
3003 				 * Data checksum was not present.
3004 				 *
3005 				 * Compensate for any port change.
3006 				 */
3007 
3008 				CALC_SUMD(psum2, psum1, psumd);
3009 				sumd2 += psumd;
3010 			}
3011 			break;
3012 
3013 		case IPPROTO_ICMP :
3014 
3015 			orgicmp = (icmphdr_t *)udp;
3016 
3017 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3018 			    (orgicmp->icmp_id != nat->nat_inport) &&
3019 			    (dlen >= 8)) {
3020 
3021 				/*
3022 				 * Fix ICMP checksum (of the offening ICMP
3023 				 * query packet) to compensate the change
3024 				 * in the ICMP id of the offending ICMP
3025 				 * packet.
3026 				 *
3027 				 * Since you modify orgicmp->icmp_id with
3028 				 * a delta (say x) and you compensate that
3029 				 * in origicmp->icmp_cksum with a delta
3030 				 * minus x, you don't have to adjust the
3031 				 * overall icmp->icmp_cksum
3032 				 */
3033 
3034 				sum1 = ntohs(orgicmp->icmp_id);
3035 				sum2 = ntohs(nat->nat_inport);
3036 				CALC_SUMD(sum1, sum2, sumd);
3037 				orgicmp->icmp_id = nat->nat_inport;
3038 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3039 
3040 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3041 
3042 			break;
3043 
3044 		default :
3045 
3046 			break;
3047 
3048 	} /* switch (oip->ip_p) */
3049 
3050 	/*
3051 	 * Step 3
3052 	 * Make the adjustments to icmp checksum.
3053 	 */
3054 
3055 	if (sumd2 != 0) {
3056 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3057 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3058 		fix_incksum(&icmp->icmp_cksum, sumd2);
3059 	}
3060 	return nat;
3061 }
3062 
3063 
3064 /*
3065  * NB: these lookups don't lock access to the list, it assumed that it has
3066  * already been done!
3067  */
3068 
3069 /* ------------------------------------------------------------------------ */
3070 /* Function:    nat_inlookup                                                */
3071 /* Returns:     nat_t* - NULL == no match,                                  */
3072 /*                       else pointer to matching NAT entry                 */
3073 /* Parameters:  fin(I)    - pointer to packet information                   */
3074 /*              flags(I)  - NAT flags for this packet                       */
3075 /*              p(I)      - protocol for this packet                        */
3076 /*              src(I)    - source IP address                               */
3077 /*              mapdst(I) - destination IP address                          */
3078 /*                                                                          */
3079 /* Lookup a nat entry based on the mapped destination ip address/port and   */
3080 /* real source address/port.  We use this lookup when receiving a packet,   */
3081 /* we're looking for a table entry, based on the destination address.       */
3082 /*                                                                          */
3083 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3084 /*                                                                          */
3085 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3086 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3087 /*                                                                          */
3088 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3089 /*            the packet is of said protocol                                */
3090 /* ------------------------------------------------------------------------ */
3091 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3092 fr_info_t *fin;
3093 u_int flags, p;
3094 struct in_addr src , mapdst;
3095 {
3096 	u_short sport, dport;
3097 	ipnat_t *ipn;
3098 	u_int sflags;
3099 	nat_t *nat;
3100 	int nflags;
3101 	u_32_t dst;
3102 	void *ifp;
3103 	u_int hv;
3104 	ipf_stack_t *ifs = fin->fin_ifs;
3105 
3106 	if (fin != NULL)
3107 		ifp = fin->fin_ifp;
3108 	else
3109 		ifp = NULL;
3110 	sport = 0;
3111 	dport = 0;
3112 	dst = mapdst.s_addr;
3113 	sflags = flags & NAT_TCPUDPICMP;
3114 
3115 	switch (p)
3116 	{
3117 	case IPPROTO_TCP :
3118 	case IPPROTO_UDP :
3119 		sport = htons(fin->fin_data[0]);
3120 		dport = htons(fin->fin_data[1]);
3121 		break;
3122 	case IPPROTO_ICMP :
3123 		if (flags & IPN_ICMPERR)
3124 			sport = fin->fin_data[1];
3125 		else
3126 			dport = fin->fin_data[1];
3127 		break;
3128 	default :
3129 		break;
3130 	}
3131 
3132 
3133 	if ((flags & SI_WILDP) != 0)
3134 		goto find_in_wild_ports;
3135 
3136 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3137 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3138 	nat = ifs->ifs_nat_table[1][hv];
3139 	for (; nat; nat = nat->nat_hnext[1]) {
3140 		if (nat->nat_ifps[0] != NULL) {
3141 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3142 				continue;
3143 		} else if (ifp != NULL)
3144 			nat->nat_ifps[0] = ifp;
3145 
3146 		nflags = nat->nat_flags;
3147 
3148 		if (nat->nat_oip.s_addr == src.s_addr &&
3149 		    nat->nat_outip.s_addr == dst &&
3150 		    (((p == 0) &&
3151 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3152 		     || (p == nat->nat_p))) {
3153 			switch (p)
3154 			{
3155 #if 0
3156 			case IPPROTO_GRE :
3157 				if (nat->nat_call[1] != fin->fin_data[0])
3158 					continue;
3159 				break;
3160 #endif
3161 			case IPPROTO_ICMP :
3162 				if ((flags & IPN_ICMPERR) != 0) {
3163 					if (nat->nat_outport != sport)
3164 						continue;
3165 				} else {
3166 					if (nat->nat_outport != dport)
3167 						continue;
3168 				}
3169 				break;
3170 			case IPPROTO_TCP :
3171 			case IPPROTO_UDP :
3172 				if (nat->nat_oport != sport)
3173 					continue;
3174 				if (nat->nat_outport != dport)
3175 					continue;
3176 				break;
3177 			default :
3178 				break;
3179 			}
3180 
3181 			ipn = nat->nat_ptr;
3182 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3183 				if (appr_match(fin, nat) != 0)
3184 					continue;
3185 			return nat;
3186 		}
3187 	}
3188 
3189 	/*
3190 	 * So if we didn't find it but there are wildcard members in the hash
3191 	 * table, go back and look for them.  We do this search and update here
3192 	 * because it is modifying the NAT table and we want to do this only
3193 	 * for the first packet that matches.  The exception, of course, is
3194 	 * for "dummy" (FI_IGNORE) lookups.
3195 	 */
3196 find_in_wild_ports:
3197 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3198 		return NULL;
3199 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3200 		return NULL;
3201 
3202 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3203 
3204 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3205 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3206 
3207 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3208 
3209 	nat = ifs->ifs_nat_table[1][hv];
3210 	for (; nat; nat = nat->nat_hnext[1]) {
3211 		if (nat->nat_ifps[0] != NULL) {
3212 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3213 				continue;
3214 		} else if (ifp != NULL)
3215 			nat->nat_ifps[0] = ifp;
3216 
3217 		if (nat->nat_p != fin->fin_p)
3218 			continue;
3219 		if (nat->nat_oip.s_addr != src.s_addr ||
3220 		    nat->nat_outip.s_addr != dst)
3221 			continue;
3222 
3223 		nflags = nat->nat_flags;
3224 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3225 			continue;
3226 
3227 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3228 			       NAT_INBOUND) == 1) {
3229 			if ((fin->fin_flx & FI_IGNORE) != 0)
3230 				break;
3231 			if ((nflags & SI_CLONE) != 0) {
3232 				nat = fr_natclone(fin, nat);
3233 				if (nat == NULL)
3234 					break;
3235 			} else {
3236 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3237 				ifs->ifs_nat_stats.ns_wilds--;
3238 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3239 			}
3240 			nat->nat_oport = sport;
3241 			nat->nat_outport = dport;
3242 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3243 			nat_tabmove(nat, ifs);
3244 			break;
3245 		}
3246 	}
3247 
3248 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3249 
3250 	return nat;
3251 }
3252 
3253 
3254 /* ------------------------------------------------------------------------ */
3255 /* Function:    nat_tabmove                                                 */
3256 /* Returns:     Nil                                                         */
3257 /* Parameters:  nat(I) - pointer to NAT structure                           */
3258 /* Write Lock:  ipf_nat                                                     */
3259 /*                                                                          */
3260 /* This function is only called for TCP/UDP NAT table entries where the     */
3261 /* original was placed in the table without hashing on the ports and we now */
3262 /* want to include hashing on port numbers.                                 */
3263 /* ------------------------------------------------------------------------ */
3264 static void nat_tabmove(nat, ifs)
3265 nat_t *nat;
3266 ipf_stack_t *ifs;
3267 {
3268 	nat_t **natp;
3269 	u_int hv;
3270 
3271 	if (nat->nat_flags & SI_CLONE)
3272 		return;
3273 
3274 	/*
3275 	 * Remove the NAT entry from the old location
3276 	 */
3277 	if (nat->nat_hnext[0])
3278 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3279 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3280 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3281 
3282 	if (nat->nat_hnext[1])
3283 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3284 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3285 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3286 
3287 	/*
3288 	 * Add into the NAT table in the new position
3289 	 */
3290 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3291 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3292 			 ifs->ifs_ipf_nattable_sz);
3293 	nat->nat_hv[0] = hv;
3294 	natp = &ifs->ifs_nat_table[0][hv];
3295 	if (*natp)
3296 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3297 	nat->nat_phnext[0] = natp;
3298 	nat->nat_hnext[0] = *natp;
3299 	*natp = nat;
3300 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3301 
3302 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3303 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3304 			 ifs->ifs_ipf_nattable_sz);
3305 	nat->nat_hv[1] = hv;
3306 	natp = &ifs->ifs_nat_table[1][hv];
3307 	if (*natp)
3308 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3309 	nat->nat_phnext[1] = natp;
3310 	nat->nat_hnext[1] = *natp;
3311 	*natp = nat;
3312 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3313 }
3314 
3315 
3316 /* ------------------------------------------------------------------------ */
3317 /* Function:    nat_outlookup                                               */
3318 /* Returns:     nat_t* - NULL == no match,                                  */
3319 /*                       else pointer to matching NAT entry                 */
3320 /* Parameters:  fin(I)   - pointer to packet information                    */
3321 /*              flags(I) - NAT flags for this packet                        */
3322 /*              p(I)     - protocol for this packet                         */
3323 /*              src(I)   - source IP address                                */
3324 /*              dst(I)   - destination IP address                           */
3325 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3326 /*                                                                          */
3327 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3328 /* destination address/port.  We use this lookup when sending a packet out, */
3329 /* we're looking for a table entry, based on the source address.            */
3330 /*                                                                          */
3331 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3332 /*                                                                          */
3333 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3334 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3335 /*                                                                          */
3336 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3337 /*            the packet is of said protocol                                */
3338 /* ------------------------------------------------------------------------ */
3339 nat_t *nat_outlookup(fin, flags, p, src, dst)
3340 fr_info_t *fin;
3341 u_int flags, p;
3342 struct in_addr src , dst;
3343 {
3344 	u_short sport, dport;
3345 	u_int sflags;
3346 	ipnat_t *ipn;
3347 	u_32_t srcip;
3348 	nat_t *nat;
3349 	int nflags;
3350 	void *ifp;
3351 	u_int hv;
3352 	ipf_stack_t *ifs = fin->fin_ifs;
3353 
3354 	ifp = fin->fin_ifp;
3355 
3356 	srcip = src.s_addr;
3357 	sflags = flags & IPN_TCPUDPICMP;
3358 	sport = 0;
3359 	dport = 0;
3360 
3361 	switch (p)
3362 	{
3363 	case IPPROTO_TCP :
3364 	case IPPROTO_UDP :
3365 		sport = htons(fin->fin_data[0]);
3366 		dport = htons(fin->fin_data[1]);
3367 		break;
3368 	case IPPROTO_ICMP :
3369 		if (flags & IPN_ICMPERR)
3370 			sport = fin->fin_data[1];
3371 		else
3372 			dport = fin->fin_data[1];
3373 		break;
3374 	default :
3375 		break;
3376 	}
3377 
3378 	if ((flags & SI_WILDP) != 0)
3379 		goto find_out_wild_ports;
3380 
3381 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3382 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3383 	nat = ifs->ifs_nat_table[0][hv];
3384 	for (; nat; nat = nat->nat_hnext[0]) {
3385 		if (nat->nat_ifps[1] != NULL) {
3386 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3387 				continue;
3388 		} else if (ifp != NULL)
3389 			nat->nat_ifps[1] = ifp;
3390 
3391 		nflags = nat->nat_flags;
3392 
3393 		if (nat->nat_inip.s_addr == srcip &&
3394 		    nat->nat_oip.s_addr == dst.s_addr &&
3395 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3396 		     || (p == nat->nat_p))) {
3397 			switch (p)
3398 			{
3399 #if 0
3400 			case IPPROTO_GRE :
3401 				if (nat->nat_call[1] != fin->fin_data[0])
3402 					continue;
3403 				break;
3404 #endif
3405 			case IPPROTO_TCP :
3406 			case IPPROTO_UDP :
3407 				if (nat->nat_oport != dport)
3408 					continue;
3409 				if (nat->nat_inport != sport)
3410 					continue;
3411 				break;
3412 			default :
3413 				break;
3414 			}
3415 
3416 			ipn = nat->nat_ptr;
3417 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3418 				if (appr_match(fin, nat) != 0)
3419 					continue;
3420 			return nat;
3421 		}
3422 	}
3423 
3424 	/*
3425 	 * So if we didn't find it but there are wildcard members in the hash
3426 	 * table, go back and look for them.  We do this search and update here
3427 	 * because it is modifying the NAT table and we want to do this only
3428 	 * for the first packet that matches.  The exception, of course, is
3429 	 * for "dummy" (FI_IGNORE) lookups.
3430 	 */
3431 find_out_wild_ports:
3432 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3433 		return NULL;
3434 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3435 		return NULL;
3436 
3437 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3438 
3439 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3440 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3441 
3442 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3443 
3444 	nat = ifs->ifs_nat_table[0][hv];
3445 	for (; nat; nat = nat->nat_hnext[0]) {
3446 		if (nat->nat_ifps[1] != NULL) {
3447 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3448 				continue;
3449 		} else if (ifp != NULL)
3450 			nat->nat_ifps[1] = ifp;
3451 
3452 		if (nat->nat_p != fin->fin_p)
3453 			continue;
3454 		if ((nat->nat_inip.s_addr != srcip) ||
3455 		    (nat->nat_oip.s_addr != dst.s_addr))
3456 			continue;
3457 
3458 		nflags = nat->nat_flags;
3459 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3460 			continue;
3461 
3462 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3463 			       NAT_OUTBOUND) == 1) {
3464 			if ((fin->fin_flx & FI_IGNORE) != 0)
3465 				break;
3466 			if ((nflags & SI_CLONE) != 0) {
3467 				nat = fr_natclone(fin, nat);
3468 				if (nat == NULL)
3469 					break;
3470 			} else {
3471 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3472 				ifs->ifs_nat_stats.ns_wilds--;
3473 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3474 			}
3475 			nat->nat_inport = sport;
3476 			nat->nat_oport = dport;
3477 			if (nat->nat_outport == 0)
3478 				nat->nat_outport = sport;
3479 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3480 			nat_tabmove(nat, ifs);
3481 			break;
3482 		}
3483 	}
3484 
3485 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3486 
3487 	return nat;
3488 }
3489 
3490 
3491 /* ------------------------------------------------------------------------ */
3492 /* Function:    nat_lookupredir                                             */
3493 /* Returns:     nat_t* - NULL == no match,                                  */
3494 /*                       else pointer to matching NAT entry                 */
3495 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3496 /*                      entry for.                                          */
3497 /*                                                                          */
3498 /* Lookup the NAT tables to search for a matching redirect                  */
3499 /* ------------------------------------------------------------------------ */
3500 nat_t *nat_lookupredir(np, ifs)
3501 natlookup_t *np;
3502 ipf_stack_t *ifs;
3503 {
3504 	fr_info_t fi;
3505 	nat_t *nat;
3506 
3507 	bzero((char *)&fi, sizeof(fi));
3508 	if (np->nl_flags & IPN_IN) {
3509 		fi.fin_data[0] = ntohs(np->nl_realport);
3510 		fi.fin_data[1] = ntohs(np->nl_outport);
3511 	} else {
3512 		fi.fin_data[0] = ntohs(np->nl_inport);
3513 		fi.fin_data[1] = ntohs(np->nl_outport);
3514 	}
3515 	if (np->nl_flags & IPN_TCP)
3516 		fi.fin_p = IPPROTO_TCP;
3517 	else if (np->nl_flags & IPN_UDP)
3518 		fi.fin_p = IPPROTO_UDP;
3519 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3520 		fi.fin_p = IPPROTO_ICMP;
3521 
3522 	fi.fin_ifs = ifs;
3523 	/*
3524 	 * We can do two sorts of lookups:
3525 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3526 	 * - default: we have the `in' and `out' address, look for `real'.
3527 	 */
3528 	if (np->nl_flags & IPN_IN) {
3529 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3530 					np->nl_realip, np->nl_outip))) {
3531 			np->nl_inip = nat->nat_inip;
3532 			np->nl_inport = nat->nat_inport;
3533 		}
3534 	} else {
3535 		/*
3536 		 * If nl_inip is non null, this is a lookup based on the real
3537 		 * ip address. Else, we use the fake.
3538 		 */
3539 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3540 					 np->nl_inip, np->nl_outip))) {
3541 
3542 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3543 				fr_info_t fin;
3544 				bzero((char *)&fin, sizeof(fin));
3545 				fin.fin_p = nat->nat_p;
3546 				fin.fin_data[0] = ntohs(nat->nat_outport);
3547 				fin.fin_data[1] = ntohs(nat->nat_oport);
3548 				fin.fin_ifs = ifs;
3549 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3550 						 nat->nat_outip,
3551 						 nat->nat_oip) != NULL) {
3552 					np->nl_flags &= ~IPN_FINDFORWARD;
3553 				}
3554 			}
3555 
3556 			np->nl_realip = nat->nat_outip;
3557 			np->nl_realport = nat->nat_outport;
3558 		}
3559  	}
3560 
3561 	return nat;
3562 }
3563 
3564 
3565 /* ------------------------------------------------------------------------ */
3566 /* Function:    nat_match                                                   */
3567 /* Returns:     int - 0 == no match, 1 == match                             */
3568 /* Parameters:  fin(I)   - pointer to packet information                    */
3569 /*              np(I)    - pointer to NAT rule                              */
3570 /*                                                                          */
3571 /* Pull the matching of a packet against a NAT rule out of that complex     */
3572 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3573 /* ------------------------------------------------------------------------ */
3574 static int nat_match(fin, np)
3575 fr_info_t *fin;
3576 ipnat_t *np;
3577 {
3578 	frtuc_t *ft;
3579 
3580 	if (fin->fin_v != 4)
3581 		return 0;
3582 
3583 	if (np->in_p && fin->fin_p != np->in_p)
3584 		return 0;
3585 
3586 	if (fin->fin_out) {
3587 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3588 			return 0;
3589 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3590 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3591 			return 0;
3592 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3593 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3594 			return 0;
3595 	} else {
3596 		if (!(np->in_redir & NAT_REDIRECT))
3597 			return 0;
3598 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3599 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3600 			return 0;
3601 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3602 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3603 			return 0;
3604 	}
3605 
3606 	ft = &np->in_tuc;
3607 	if (!(fin->fin_flx & FI_TCPUDP) ||
3608 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3609 		if (ft->ftu_scmp || ft->ftu_dcmp)
3610 			return 0;
3611 		return 1;
3612 	}
3613 
3614 	return fr_tcpudpchk(fin, ft);
3615 }
3616 
3617 
3618 /* ------------------------------------------------------------------------ */
3619 /* Function:    nat_update                                                  */
3620 /* Returns:     Nil                                                         */
3621 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3622 /*              np(I)     - pointer to NAT rule                             */
3623 /*                                                                          */
3624 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3625 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3626 /* ------------------------------------------------------------------------ */
3627 void nat_update(fin, nat, np)
3628 fr_info_t *fin;
3629 nat_t *nat;
3630 ipnat_t *np;
3631 {
3632 	ipftq_t *ifq, *ifq2;
3633 	ipftqent_t *tqe;
3634 	ipf_stack_t *ifs = fin->fin_ifs;
3635 
3636 	MUTEX_ENTER(&nat->nat_lock);
3637 	tqe = &nat->nat_tqe;
3638 	ifq = tqe->tqe_ifq;
3639 
3640 	/*
3641 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3642 	 * TCP, however, if it is TCP and there is no rule timeout set,
3643 	 * then do not update the timeout here.
3644 	 */
3645 	if (np != NULL)
3646 		ifq2 = np->in_tqehead[fin->fin_rev];
3647 	else
3648 		ifq2 = NULL;
3649 
3650 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3651 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3652 	} else {
3653 		if (ifq2 == NULL) {
3654 			if (nat->nat_p == IPPROTO_UDP)
3655 				ifq2 = &ifs->ifs_nat_udptq;
3656 			else if (nat->nat_p == IPPROTO_ICMP)
3657 				ifq2 = &ifs->ifs_nat_icmptq;
3658 			else
3659 				ifq2 = &ifs->ifs_nat_iptq;
3660 		}
3661 
3662 		fr_movequeue(tqe, ifq, ifq2, ifs);
3663 	}
3664 	MUTEX_EXIT(&nat->nat_lock);
3665 }
3666 
3667 
3668 /* ------------------------------------------------------------------------ */
3669 /* Function:    fr_checknatout                                              */
3670 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3671 /*                     0 == no packet translation occurred,                 */
3672 /*                     1 == packet was successfully translated.             */
3673 /* Parameters:  fin(I)   - pointer to packet information                    */
3674 /*              passp(I) - pointer to filtering result flags                */
3675 /*                                                                          */
3676 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3677 /* first checked to see if they match an existing entry (if an error),      */
3678 /* otherwise a search of the current NAT table is made.  If neither results */
3679 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3680 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3681 /* packet header(s) as required.                                            */
3682 /* ------------------------------------------------------------------------ */
3683 int fr_checknatout(fin, passp)
3684 fr_info_t *fin;
3685 u_32_t *passp;
3686 {
3687 	struct ifnet *ifp, *sifp;
3688 	icmphdr_t *icmp = NULL;
3689 	tcphdr_t *tcp = NULL;
3690 	int rval, natfailed;
3691 	ipnat_t *np = NULL;
3692 	u_int nflags = 0;
3693 	u_32_t ipa, iph;
3694 	int natadd = 1;
3695 	frentry_t *fr;
3696 	nat_t *nat;
3697 	ipf_stack_t *ifs = fin->fin_ifs;
3698 
3699 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3700 		return 0;
3701 
3702 	natfailed = 0;
3703 	fr = fin->fin_fr;
3704 	sifp = fin->fin_ifp;
3705 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3706 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3707 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3708 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3709 	ifp = fin->fin_ifp;
3710 
3711 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3712 		switch (fin->fin_p)
3713 		{
3714 		case IPPROTO_TCP :
3715 			nflags = IPN_TCP;
3716 			break;
3717 		case IPPROTO_UDP :
3718 			nflags = IPN_UDP;
3719 			break;
3720 		case IPPROTO_ICMP :
3721 			icmp = fin->fin_dp;
3722 
3723 			/*
3724 			 * This is an incoming packet, so the destination is
3725 			 * the icmp_id and the source port equals 0
3726 			 */
3727 			if (nat_icmpquerytype4(icmp->icmp_type))
3728 				nflags = IPN_ICMPQUERY;
3729 			break;
3730 		default :
3731 			break;
3732 		}
3733 
3734 		if ((nflags & IPN_TCPUDP))
3735 			tcp = fin->fin_dp;
3736 	}
3737 
3738 	ipa = fin->fin_saddr;
3739 
3740 	READ_ENTER(&ifs->ifs_ipf_nat);
3741 
3742 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3743 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3744 		/*EMPTY*/;
3745 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3746 		natadd = 0;
3747 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3748 				      fin->fin_src, fin->fin_dst))) {
3749 		nflags = nat->nat_flags;
3750 	} else {
3751 		u_32_t hv, msk, nmsk;
3752 
3753 		/*
3754 		 * If there is no current entry in the nat table for this IP#,
3755 		 * create one for it (if there is a matching rule).
3756 		 */
3757 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3758 		msk = 0xffffffff;
3759 		nmsk = ifs->ifs_nat_masks;
3760 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3761 maskloop:
3762 		iph = ipa & htonl(msk);
3763 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3764 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3765 		{
3766 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3767 				continue;
3768 			if (np->in_v != fin->fin_v)
3769 				continue;
3770 			if (np->in_p && (np->in_p != fin->fin_p))
3771 				continue;
3772 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3773 				continue;
3774 			if (np->in_flags & IPN_FILTER) {
3775 				if (!nat_match(fin, np))
3776 					continue;
3777 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3778 				continue;
3779 
3780 			if ((fr != NULL) &&
3781 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3782 				continue;
3783 
3784 			if (*np->in_plabel != '\0') {
3785 				if (((np->in_flags & IPN_FILTER) == 0) &&
3786 				    (np->in_dport != tcp->th_dport))
3787 					continue;
3788 				if (appr_ok(fin, tcp, np) == 0)
3789 					continue;
3790 			}
3791 
3792 			if ((nat = nat_new(fin, np, NULL, nflags,
3793 					   NAT_OUTBOUND))) {
3794 				np->in_hits++;
3795 				break;
3796 			} else
3797 				natfailed = -1;
3798 		}
3799 		if ((np == NULL) && (nmsk != 0)) {
3800 			while (nmsk) {
3801 				msk <<= 1;
3802 				if (nmsk & 0x80000000)
3803 					break;
3804 				nmsk <<= 1;
3805 			}
3806 			if (nmsk != 0) {
3807 				nmsk <<= 1;
3808 				goto maskloop;
3809 			}
3810 		}
3811 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3812 	}
3813 
3814 	if (nat != NULL) {
3815 		rval = fr_natout(fin, nat, natadd, nflags);
3816 		if (rval == 1) {
3817 			MUTEX_ENTER(&nat->nat_lock);
3818 			nat->nat_ref++;
3819 			MUTEX_EXIT(&nat->nat_lock);
3820 			nat->nat_touched = ifs->ifs_fr_ticks;
3821 			fin->fin_nat = nat;
3822 		}
3823 	} else
3824 		rval = natfailed;
3825 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3826 
3827 	if (rval == -1) {
3828 		if (passp != NULL)
3829 			*passp = FR_BLOCK;
3830 		fin->fin_flx |= FI_BADNAT;
3831 	}
3832 	fin->fin_ifp = sifp;
3833 	return rval;
3834 }
3835 
3836 /* ------------------------------------------------------------------------ */
3837 /* Function:    fr_natout                                                   */
3838 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3839 /*                     1 == packet was successfully translated.             */
3840 /* Parameters:  fin(I)    - pointer to packet information                   */
3841 /*              nat(I)    - pointer to NAT structure                        */
3842 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3843 /*              nflags(I) - NAT flags set for this packet                   */
3844 /*                                                                          */
3845 /* Translate a packet coming "out" on an interface.                         */
3846 /* ------------------------------------------------------------------------ */
3847 int fr_natout(fin, nat, natadd, nflags)
3848 fr_info_t *fin;
3849 nat_t *nat;
3850 int natadd;
3851 u_32_t nflags;
3852 {
3853 	icmphdr_t *icmp;
3854 	u_short *csump;
3855 	u_32_t sumd;
3856 	tcphdr_t *tcp;
3857 	ipnat_t *np;
3858 	int i;
3859 	ipf_stack_t *ifs = fin->fin_ifs;
3860 
3861 #if SOLARIS && defined(_KERNEL)
3862 	net_data_t net_data_p;
3863 	if (fin->fin_v == 4)
3864 		net_data_p = ifs->ifs_ipf_ipv4;
3865 	else
3866 		net_data_p = ifs->ifs_ipf_ipv6;
3867 #endif
3868 
3869 	tcp = NULL;
3870 	icmp = NULL;
3871 	csump = NULL;
3872 	np = nat->nat_ptr;
3873 
3874 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
3875 		(void) fr_nat_newfrag(fin, 0, nat);
3876 
3877 	MUTEX_ENTER(&nat->nat_lock);
3878 	nat->nat_bytes[1] += fin->fin_plen;
3879 	nat->nat_pkts[1]++;
3880 	MUTEX_EXIT(&nat->nat_lock);
3881 
3882 	/*
3883 	 * Fix up checksums, not by recalculating them, but
3884 	 * simply computing adjustments.
3885 	 * This is only done for STREAMS based IP implementations where the
3886 	 * checksum has already been calculated by IP.  In all other cases,
3887 	 * IPFilter is called before the checksum needs calculating so there
3888 	 * is no call to modify whatever is in the header now.
3889 	 */
3890 	ASSERT(fin->fin_m != NULL);
3891 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3892 		if (nflags == IPN_ICMPERR) {
3893 			u_32_t s1, s2;
3894 
3895 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3896 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3897 			CALC_SUMD(s1, s2, sumd);
3898 
3899 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3900 		}
3901 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3902     defined(linux) || defined(BRIDGE_IPF)
3903 		else {
3904 			/*
3905 			 * Strictly speaking, this isn't necessary on BSD
3906 			 * kernels because they do checksum calculation after
3907 			 * this code has run BUT if ipfilter is being used
3908 			 * to do NAT as a bridge, that code doesn't exist.
3909 			 */
3910 			if (nat->nat_dir == NAT_OUTBOUND)
3911 				fix_outcksum(&fin->fin_ip->ip_sum,
3912 					    nat->nat_ipsumd);
3913 			else
3914 				fix_incksum(&fin->fin_ip->ip_sum,
3915 				 	   nat->nat_ipsumd);
3916 		}
3917 #endif
3918 	}
3919 
3920 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3921 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3922 			tcp = fin->fin_dp;
3923 
3924 			tcp->th_sport = nat->nat_outport;
3925 			fin->fin_data[0] = ntohs(nat->nat_outport);
3926 		}
3927 
3928 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3929 			icmp = fin->fin_dp;
3930 			icmp->icmp_id = nat->nat_outport;
3931 		}
3932 
3933 		csump = nat_proto(fin, nat, nflags);
3934 	}
3935 
3936 	fin->fin_ip->ip_src = nat->nat_outip;
3937 
3938 	nat_update(fin, nat, np);
3939 
3940 	/*
3941 	 * The above comments do not hold for layer 4 (or higher) checksums...
3942 	 */
3943 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3944 		if (nflags & IPN_TCPUDP &&
3945 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3946 			sumd = nat->nat_sumd[1];
3947 		else
3948 			sumd = nat->nat_sumd[0];
3949 
3950 		if (nat->nat_dir == NAT_OUTBOUND)
3951 			fix_outcksum(csump, sumd);
3952 		else
3953 			fix_incksum(csump, sumd);
3954 	}
3955 #ifdef	IPFILTER_SYNC
3956 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3957 #endif
3958 	/* ------------------------------------------------------------- */
3959 	/* A few quick notes:						 */
3960 	/*	Following are test conditions prior to calling the 	 */
3961 	/*	appr_check routine.					 */
3962 	/*								 */
3963 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3964 	/*	with a redirect rule, we attempt to match the packet's	 */
3965 	/*	source port against in_dport, otherwise	we'd compare the */
3966 	/*	packet's destination.			 		 */
3967 	/* ------------------------------------------------------------- */
3968 	if ((np != NULL) && (np->in_apr != NULL)) {
3969 		i = appr_check(fin, nat);
3970 		if (i == 0)
3971 			i = 1;
3972 	} else
3973 		i = 1;
3974 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3975 	fin->fin_flx |= FI_NATED;
3976 	return i;
3977 }
3978 
3979 
3980 /* ------------------------------------------------------------------------ */
3981 /* Function:    fr_checknatin                                               */
3982 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3983 /*                     0 == no packet translation occurred,                 */
3984 /*                     1 == packet was successfully translated.             */
3985 /* Parameters:  fin(I)   - pointer to packet information                    */
3986 /*              passp(I) - pointer to filtering result flags                */
3987 /*                                                                          */
3988 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
3989 /* first checked to see if they match an existing entry (if an error),      */
3990 /* otherwise a search of the current NAT table is made.  If neither results */
3991 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3992 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3993 /* packet header(s) as required.                                            */
3994 /* ------------------------------------------------------------------------ */
3995 int fr_checknatin(fin, passp)
3996 fr_info_t *fin;
3997 u_32_t *passp;
3998 {
3999 	u_int nflags, natadd;
4000 	int rval, natfailed;
4001 	struct ifnet *ifp;
4002 	struct in_addr in;
4003 	icmphdr_t *icmp;
4004 	tcphdr_t *tcp;
4005 	u_short dport;
4006 	ipnat_t *np;
4007 	nat_t *nat;
4008 	u_32_t iph;
4009 	ipf_stack_t *ifs = fin->fin_ifs;
4010 
4011 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
4012 		return 0;
4013 
4014 	tcp = NULL;
4015 	icmp = NULL;
4016 	dport = 0;
4017 	natadd = 1;
4018 	nflags = 0;
4019 	natfailed = 0;
4020 	ifp = fin->fin_ifp;
4021 
4022 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4023 		switch (fin->fin_p)
4024 		{
4025 		case IPPROTO_TCP :
4026 			nflags = IPN_TCP;
4027 			break;
4028 		case IPPROTO_UDP :
4029 			nflags = IPN_UDP;
4030 			break;
4031 		case IPPROTO_ICMP :
4032 			icmp = fin->fin_dp;
4033 
4034 			/*
4035 			 * This is an incoming packet, so the destination is
4036 			 * the icmp_id and the source port equals 0
4037 			 */
4038 			if (nat_icmpquerytype4(icmp->icmp_type)) {
4039 				nflags = IPN_ICMPQUERY;
4040 				dport = icmp->icmp_id;
4041 			} break;
4042 		default :
4043 			break;
4044 		}
4045 
4046 		if ((nflags & IPN_TCPUDP)) {
4047 			tcp = fin->fin_dp;
4048 			dport = tcp->th_dport;
4049 		}
4050 	}
4051 
4052 	in = fin->fin_dst;
4053 
4054 	READ_ENTER(&ifs->ifs_ipf_nat);
4055 
4056 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4057 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4058 		/*EMPTY*/;
4059 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4060 		natadd = 0;
4061 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4062 				     fin->fin_src, in))) {
4063 		nflags = nat->nat_flags;
4064 	} else {
4065 		u_32_t hv, msk, rmsk;
4066 
4067 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4068 		rmsk = ifs->ifs_rdr_masks;
4069 		msk = 0xffffffff;
4070 		WRITE_ENTER(&ifs->ifs_ipf_nat);
4071 		/*
4072 		 * If there is no current entry in the nat table for this IP#,
4073 		 * create one for it (if there is a matching rule).
4074 		 */
4075 maskloop:
4076 		iph = in.s_addr & htonl(msk);
4077 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4078 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
4079 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4080 				continue;
4081 			if (np->in_v != fin->fin_v)
4082 				continue;
4083 			if (np->in_p && (np->in_p != fin->fin_p))
4084 				continue;
4085 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4086 				continue;
4087 			if (np->in_flags & IPN_FILTER) {
4088 				if (!nat_match(fin, np))
4089 					continue;
4090 			} else {
4091 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4092 					continue;
4093 				if (np->in_pmin &&
4094 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4095 				     (ntohs(dport) < ntohs(np->in_pmin))))
4096 					continue;
4097 			}
4098 
4099 			if (*np->in_plabel != '\0') {
4100 				if (!appr_ok(fin, tcp, np)) {
4101 					continue;
4102 				}
4103 			}
4104 
4105 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4106 			if (nat != NULL) {
4107 				np->in_hits++;
4108 				break;
4109 			} else
4110 				natfailed = -1;
4111 		}
4112 
4113 		if ((np == NULL) && (rmsk != 0)) {
4114 			while (rmsk) {
4115 				msk <<= 1;
4116 				if (rmsk & 0x80000000)
4117 					break;
4118 				rmsk <<= 1;
4119 			}
4120 			if (rmsk != 0) {
4121 				rmsk <<= 1;
4122 				goto maskloop;
4123 			}
4124 		}
4125 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4126 	}
4127 	if (nat != NULL) {
4128 		rval = fr_natin(fin, nat, natadd, nflags);
4129 		if (rval == 1) {
4130 			MUTEX_ENTER(&nat->nat_lock);
4131 			nat->nat_ref++;
4132 			MUTEX_EXIT(&nat->nat_lock);
4133 			nat->nat_touched = ifs->ifs_fr_ticks;
4134 			fin->fin_nat = nat;
4135 			fin->fin_state = nat->nat_state;
4136 		}
4137 	} else
4138 		rval = natfailed;
4139 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4140 
4141 	if (rval == -1) {
4142 		if (passp != NULL)
4143 			*passp = FR_BLOCK;
4144 		fin->fin_flx |= FI_BADNAT;
4145 	}
4146 	return rval;
4147 }
4148 
4149 
4150 /* ------------------------------------------------------------------------ */
4151 /* Function:    fr_natin                                                    */
4152 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4153 /*                     1 == packet was successfully translated.             */
4154 /* Parameters:  fin(I)    - pointer to packet information                   */
4155 /*              nat(I)    - pointer to NAT structure                        */
4156 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4157 /*              nflags(I) - NAT flags set for this packet                   */
4158 /* Locks Held:  ipf_nat (READ)                                              */
4159 /*                                                                          */
4160 /* Translate a packet coming "in" on an interface.                          */
4161 /* ------------------------------------------------------------------------ */
4162 int fr_natin(fin, nat, natadd, nflags)
4163 fr_info_t *fin;
4164 nat_t *nat;
4165 int natadd;
4166 u_32_t nflags;
4167 {
4168 	icmphdr_t *icmp;
4169 	u_short *csump;
4170 	tcphdr_t *tcp;
4171 	ipnat_t *np;
4172 	int i;
4173 	ipf_stack_t *ifs = fin->fin_ifs;
4174 
4175 #if SOLARIS && defined(_KERNEL)
4176 	net_data_t net_data_p;
4177 	if (fin->fin_v == 4)
4178 		net_data_p = ifs->ifs_ipf_ipv4;
4179 	else
4180 		net_data_p = ifs->ifs_ipf_ipv6;
4181 #endif
4182 
4183 	tcp = NULL;
4184 	csump = NULL;
4185 	np = nat->nat_ptr;
4186 	fin->fin_fr = nat->nat_fr;
4187 
4188 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4189 		(void) fr_nat_newfrag(fin, 0, nat);
4190 
4191 	if (np != NULL) {
4192 
4193 	/* ------------------------------------------------------------- */
4194 	/* A few quick notes:						 */
4195 	/*	Following are test conditions prior to calling the 	 */
4196 	/*	appr_check routine.					 */
4197 	/*								 */
4198 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4199 	/*	with a map rule, we attempt to match the packet's	 */
4200 	/*	source port against in_dport, otherwise	we'd compare the */
4201 	/*	packet's destination.			 		 */
4202 	/* ------------------------------------------------------------- */
4203 		if (np->in_apr != NULL) {
4204 			i = appr_check(fin, nat);
4205 			if (i == -1) {
4206 				return -1;
4207 			}
4208 		}
4209 	}
4210 
4211 #ifdef	IPFILTER_SYNC
4212 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4213 #endif
4214 
4215 	MUTEX_ENTER(&nat->nat_lock);
4216 	nat->nat_bytes[0] += fin->fin_plen;
4217 	nat->nat_pkts[0]++;
4218 	MUTEX_EXIT(&nat->nat_lock);
4219 
4220 	fin->fin_ip->ip_dst = nat->nat_inip;
4221 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4222 	if (nflags & IPN_TCPUDP)
4223 		tcp = fin->fin_dp;
4224 
4225 	/*
4226 	 * Fix up checksums, not by recalculating them, but
4227 	 * simply computing adjustments.
4228 	 * Why only do this for some platforms on inbound packets ?
4229 	 * Because for those that it is done, IP processing is yet to happen
4230 	 * and so the IPv4 header checksum has not yet been evaluated.
4231 	 * Perhaps it should always be done for the benefit of things like
4232 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4233 	 * header checksum offloading, perhaps it is a moot point.
4234 	 */
4235 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4236      defined(__osf__) || defined(linux)
4237 	if (nat->nat_dir == NAT_OUTBOUND)
4238 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4239 	else
4240 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4241 #endif
4242 
4243 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4244 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4245 			tcp->th_dport = nat->nat_inport;
4246 			fin->fin_data[1] = ntohs(nat->nat_inport);
4247 		}
4248 
4249 
4250 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4251 			icmp = fin->fin_dp;
4252 
4253 			icmp->icmp_id = nat->nat_inport;
4254 		}
4255 
4256 		csump = nat_proto(fin, nat, nflags);
4257 	}
4258 
4259 	nat_update(fin, nat, np);
4260 
4261 	/*
4262 	 * In case they are being forwarded, inbound packets always need to have
4263 	 * their checksum adjusted even if hardware checksum validation said OK.
4264 	 */
4265 	if (csump != NULL) {
4266 		if (nat->nat_dir == NAT_OUTBOUND)
4267 			fix_incksum(csump, nat->nat_sumd[0]);
4268 		else
4269 			fix_outcksum(csump, nat->nat_sumd[0]);
4270 	}
4271 
4272 #if SOLARIS && defined(_KERNEL)
4273 	if (nflags & IPN_TCPUDP &&
4274 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4275 		/*
4276 		 * Need to adjust the partial checksum result stored in
4277 		 * db_cksum16, which will be used for validation in IP.
4278 		 * See IP_CKSUM_RECV().
4279 		 * Adjustment data should be the inverse of the IP address
4280 		 * changes, because db_cksum16 is supposed to be the complement
4281 		 * of the pesudo header.
4282 		 */
4283 		csump = &fin->fin_m->b_datap->db_cksum16;
4284 		if (nat->nat_dir == NAT_OUTBOUND)
4285 			fix_outcksum(csump, nat->nat_sumd[1]);
4286 		else
4287 			fix_incksum(csump, nat->nat_sumd[1]);
4288 	}
4289 #endif
4290 
4291 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4292 	fin->fin_flx |= FI_NATED;
4293 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4294 		fin->fin_nattag = &np->in_tag;
4295 	return 1;
4296 }
4297 
4298 
4299 /* ------------------------------------------------------------------------ */
4300 /* Function:    nat_proto                                                   */
4301 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4302 /*                         NULL if the transport protocol is not recognised */
4303 /*                         as needing a checksum update.                    */
4304 /* Parameters:  fin(I)    - pointer to packet information                   */
4305 /*              nat(I)    - pointer to NAT structure                        */
4306 /*              nflags(I) - NAT flags set for this packet                   */
4307 /*                                                                          */
4308 /* Return the pointer to the checksum field for each protocol so understood.*/
4309 /* If support for making other changes to a protocol header is required,    */
4310 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4311 /* TCP down to a specific value, then do it from here.                      */
4312 /* ------------------------------------------------------------------------ */
4313 u_short *nat_proto(fin, nat, nflags)
4314 fr_info_t *fin;
4315 nat_t *nat;
4316 u_int nflags;
4317 {
4318 	icmphdr_t *icmp;
4319 	u_short *csump;
4320 	tcphdr_t *tcp;
4321 	udphdr_t *udp;
4322 
4323 	csump = NULL;
4324 	if (fin->fin_out == 0) {
4325 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4326 	} else {
4327 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4328 	}
4329 
4330 	switch (fin->fin_p)
4331 	{
4332 	case IPPROTO_TCP :
4333 		tcp = fin->fin_dp;
4334 
4335 		csump = &tcp->th_sum;
4336 
4337 		/*
4338 		 * Do a MSS CLAMPING on a SYN packet,
4339 		 * only deal IPv4 for now.
4340 		 */
4341 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4342 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4343 
4344 		break;
4345 
4346 	case IPPROTO_UDP :
4347 		udp = fin->fin_dp;
4348 
4349 		if (udp->uh_sum)
4350 			csump = &udp->uh_sum;
4351 		break;
4352 
4353 	case IPPROTO_ICMP :
4354 		icmp = fin->fin_dp;
4355 
4356 		if ((nflags & IPN_ICMPQUERY) != 0) {
4357 			if (icmp->icmp_cksum != 0)
4358 				csump = &icmp->icmp_cksum;
4359 		}
4360 		break;
4361 	}
4362 	return csump;
4363 }
4364 
4365 
4366 /* ------------------------------------------------------------------------ */
4367 /* Function:    fr_natunload                                                */
4368 /* Returns:     Nil                                                         */
4369 /* Parameters:  Nil                                                         */
4370 /*                                                                          */
4371 /* Free all memory used by NAT structures allocated at runtime.             */
4372 /* ------------------------------------------------------------------------ */
4373 void fr_natunload(ifs)
4374 ipf_stack_t *ifs;
4375 {
4376 	ipftq_t *ifq, *ifqnext;
4377 
4378 	(void) nat_clearlist(ifs);
4379 	(void) nat_flushtable(ifs);
4380 
4381 	/*
4382 	 * Proxy timeout queues are not cleaned here because although they
4383 	 * exist on the NAT list, appr_unload is called after fr_natunload
4384 	 * and the proxies actually are responsible for them being created.
4385 	 * Should the proxy timeouts have their own list?  There's no real
4386 	 * justification as this is the only complication.
4387 	 */
4388 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4389 		ifqnext = ifq->ifq_next;
4390 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4391 		    (fr_deletetimeoutqueue(ifq) == 0))
4392 			fr_freetimeoutqueue(ifq, ifs);
4393 	}
4394 
4395 	if (ifs->ifs_nat_table[0] != NULL) {
4396 		KFREES(ifs->ifs_nat_table[0],
4397 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4398 		ifs->ifs_nat_table[0] = NULL;
4399 	}
4400 	if (ifs->ifs_nat_table[1] != NULL) {
4401 		KFREES(ifs->ifs_nat_table[1],
4402 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4403 		ifs->ifs_nat_table[1] = NULL;
4404 	}
4405 	if (ifs->ifs_nat_rules != NULL) {
4406 		KFREES(ifs->ifs_nat_rules,
4407 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4408 		ifs->ifs_nat_rules = NULL;
4409 	}
4410 	if (ifs->ifs_rdr_rules != NULL) {
4411 		KFREES(ifs->ifs_rdr_rules,
4412 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4413 		ifs->ifs_rdr_rules = NULL;
4414 	}
4415 	if (ifs->ifs_maptable != NULL) {
4416 		KFREES(ifs->ifs_maptable,
4417 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4418 		ifs->ifs_maptable = NULL;
4419 	}
4420 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4421 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4422 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4423 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4424 	}
4425 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4426 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4427 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4428 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4429 	}
4430 
4431 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4432 		ifs->ifs_fr_nat_maxbucket = 0;
4433 
4434 	if (ifs->ifs_fr_nat_init == 1) {
4435 		ifs->ifs_fr_nat_init = 0;
4436 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4437 
4438 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4439 		RW_DESTROY(&ifs->ifs_ipf_nat);
4440 
4441 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4442 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4443 
4444 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4445 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4446 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4447 	}
4448 }
4449 
4450 
4451 /* ------------------------------------------------------------------------ */
4452 /* Function:    fr_natexpire                                                */
4453 /* Returns:     Nil                                                         */
4454 /* Parameters:  Nil                                                         */
4455 /*                                                                          */
4456 /* Check all of the timeout queues for entries at the top which need to be  */
4457 /* expired.                                                                 */
4458 /* ------------------------------------------------------------------------ */
4459 void fr_natexpire(ifs)
4460 ipf_stack_t *ifs;
4461 {
4462 	ipftq_t *ifq, *ifqnext;
4463 	ipftqent_t *tqe, *tqn;
4464 	int i;
4465 	SPL_INT(s);
4466 
4467 	SPL_NET(s);
4468 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4469 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4470 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4471 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4472 				break;
4473 			tqn = tqe->tqe_next;
4474 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4475 		}
4476 	}
4477 
4478 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4479 		ifqnext = ifq->ifq_next;
4480 
4481 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4482 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4483 				break;
4484 			tqn = tqe->tqe_next;
4485 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4486 		}
4487 	}
4488 
4489 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4490 		ifqnext = ifq->ifq_next;
4491 
4492 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4493 		    (ifq->ifq_ref == 0)) {
4494 			fr_freetimeoutqueue(ifq, ifs);
4495 		}
4496 	}
4497 
4498 	if (ifs->ifs_nat_doflush != 0) {
4499 		(void) nat_extraflush(2, ifs);
4500 		ifs->ifs_nat_doflush = 0;
4501 	}
4502 
4503 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4504 	SPL_X(s);
4505 }
4506 
4507 
4508 /* ------------------------------------------------------------------------ */
4509 /* Function:    fr_nataddrsync                                              */
4510 /* Returns:     Nil                                                         */
4511 /* Parameters:  ifp(I) -  pointer to network interface                      */
4512 /*              addr(I) - pointer to new network address                    */
4513 /*                                                                          */
4514 /* Walk through all of the currently active NAT sessions, looking for those */
4515 /* which need to have their translated address updated (where the interface */
4516 /* matches the one passed in) and change it, recalculating the checksum sum */
4517 /* difference too.                                                          */
4518 /* ------------------------------------------------------------------------ */
4519 void fr_nataddrsync(ifp, addr, ifs)
4520 void *ifp;
4521 struct in_addr *addr;
4522 ipf_stack_t *ifs;
4523 {
4524 	u_32_t sum1, sum2, sumd;
4525 	nat_t *nat;
4526 	ipnat_t *np;
4527 	SPL_INT(s);
4528 
4529 	if (ifs->ifs_fr_running <= 0)
4530 		return;
4531 
4532 	SPL_NET(s);
4533 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4534 
4535 	if (ifs->ifs_fr_running <= 0) {
4536 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4537 		return;
4538 	}
4539 
4540 	/*
4541 	 * Change IP addresses for NAT sessions for any protocol except TCP
4542 	 * since it will break the TCP connection anyway.  The only rules
4543 	 * which will get changed are those which are "map ... -> 0/32",
4544 	 * where the rule specifies the address is taken from the interface.
4545 	 */
4546 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4547 		if (addr != NULL) {
4548 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4549 			    ((nat->nat_flags & IPN_TCP) != 0))
4550 				continue;
4551 			if (((np = nat->nat_ptr) == NULL) ||
4552 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4553 				continue;
4554 
4555 			/*
4556 			 * Change the map-to address to be the same as the
4557 			 * new one.
4558 			 */
4559 			sum1 = nat->nat_outip.s_addr;
4560 			nat->nat_outip = *addr;
4561 			sum2 = nat->nat_outip.s_addr;
4562 
4563 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4564 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4565 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4566 			struct in_addr in;
4567 
4568 			/*
4569 			 * Change the map-to address to be the same as the
4570 			 * new one.
4571 			 */
4572 			sum1 = nat->nat_outip.s_addr;
4573 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4574 				       &in, NULL, ifs) != -1)
4575 				nat->nat_outip = in;
4576 			sum2 = nat->nat_outip.s_addr;
4577 		} else {
4578 			continue;
4579 		}
4580 
4581 		if (sum1 == sum2)
4582 			continue;
4583 		/*
4584 		 * Readjust the checksum adjustment to take into
4585 		 * account the new IP#.
4586 		 */
4587 		CALC_SUMD(sum1, sum2, sumd);
4588 		/* XXX - dont change for TCP when solaris does
4589 		 * hardware checksumming.
4590 		 */
4591 		sumd += nat->nat_sumd[0];
4592 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4593 		nat->nat_sumd[1] = nat->nat_sumd[0];
4594 	}
4595 
4596 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4597 	SPL_X(s);
4598 }
4599 
4600 
4601 /* ------------------------------------------------------------------------ */
4602 /* Function:    fr_natifpsync                                               */
4603 /* Returns:     Nil                                                         */
4604 /* Parameters:  action(I) - how we are syncing                              */
4605 /*              ifp(I)    - pointer to network interface                    */
4606 /*              name(I)   - name of interface to sync to                    */
4607 /*                                                                          */
4608 /* This function is used to resync the mapping of interface names and their */
4609 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4610 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4611 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4612 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4613 /* there is no longer any interface associated with it.                     */
4614 /* ------------------------------------------------------------------------ */
4615 void fr_natifpsync(action, ifp, name, ifs)
4616 int action;
4617 void *ifp;
4618 char *name;
4619 ipf_stack_t *ifs;
4620 {
4621 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4622 	int s;
4623 #endif
4624 	nat_t *nat;
4625 	ipnat_t *n;
4626 
4627 	if (ifs->ifs_fr_running <= 0)
4628 		return;
4629 
4630 	SPL_NET(s);
4631 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4632 
4633 	if (ifs->ifs_fr_running <= 0) {
4634 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4635 		return;
4636 	}
4637 
4638 	switch (action)
4639 	{
4640 	case IPFSYNC_RESYNC :
4641 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4642 			if ((ifp == nat->nat_ifps[0]) ||
4643 			    (nat->nat_ifps[0] == (void *)-1)) {
4644 				nat->nat_ifps[0] =
4645 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4646 			}
4647 
4648 			if ((ifp == nat->nat_ifps[1]) ||
4649 			    (nat->nat_ifps[1] == (void *)-1)) {
4650 				nat->nat_ifps[1] =
4651 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4652 			}
4653 		}
4654 
4655 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4656 			if (n->in_ifps[0] == ifp ||
4657 			    n->in_ifps[0] == (void *)-1) {
4658 				n->in_ifps[0] =
4659 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4660 			}
4661 			if (n->in_ifps[1] == ifp ||
4662 			    n->in_ifps[1] == (void *)-1) {
4663 				n->in_ifps[1] =
4664 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4665 			}
4666 		}
4667 		break;
4668 	case IPFSYNC_NEWIFP :
4669 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4670 			if (!strncmp(name, nat->nat_ifnames[0],
4671 				     sizeof(nat->nat_ifnames[0])))
4672 				nat->nat_ifps[0] = ifp;
4673 			if (!strncmp(name, nat->nat_ifnames[1],
4674 				     sizeof(nat->nat_ifnames[1])))
4675 				nat->nat_ifps[1] = ifp;
4676 		}
4677 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4678 			if (!strncmp(name, n->in_ifnames[0],
4679 				     sizeof(n->in_ifnames[0])))
4680 				n->in_ifps[0] = ifp;
4681 			if (!strncmp(name, n->in_ifnames[1],
4682 				     sizeof(n->in_ifnames[1])))
4683 				n->in_ifps[1] = ifp;
4684 		}
4685 		break;
4686 	case IPFSYNC_OLDIFP :
4687 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4688 			if (ifp == nat->nat_ifps[0])
4689 				nat->nat_ifps[0] = (void *)-1;
4690 			if (ifp == nat->nat_ifps[1])
4691 				nat->nat_ifps[1] = (void *)-1;
4692 		}
4693 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4694 			if (n->in_ifps[0] == ifp)
4695 				n->in_ifps[0] = (void *)-1;
4696 			if (n->in_ifps[1] == ifp)
4697 				n->in_ifps[1] = (void *)-1;
4698 		}
4699 		break;
4700 	}
4701 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4702 	SPL_X(s);
4703 }
4704 
4705 
4706 /* ------------------------------------------------------------------------ */
4707 /* Function:    nat_icmpquerytype4                                          */
4708 /* Returns:     int - 1 == success, 0 == failure                            */
4709 /* Parameters:  icmptype(I) - ICMP type number                              */
4710 /*                                                                          */
4711 /* Tests to see if the ICMP type number passed is a query/response type or  */
4712 /* not.                                                                     */
4713 /* ------------------------------------------------------------------------ */
4714 static INLINE int nat_icmpquerytype4(icmptype)
4715 int icmptype;
4716 {
4717 
4718 	/*
4719 	 * For the ICMP query NAT code, it is essential that both the query
4720 	 * and the reply match on the NAT rule. Because the NAT structure
4721 	 * does not keep track of the icmptype, and a single NAT structure
4722 	 * is used for all icmp types with the same src, dest and id, we
4723 	 * simply define the replies as queries as well. The funny thing is,
4724 	 * altough it seems silly to call a reply a query, this is exactly
4725 	 * as it is defined in the IPv4 specification
4726 	 */
4727 
4728 	switch (icmptype)
4729 	{
4730 
4731 	case ICMP_ECHOREPLY:
4732 	case ICMP_ECHO:
4733 	/* route aedvertisement/solliciation is currently unsupported: */
4734 	/* it would require rewriting the ICMP data section            */
4735 	case ICMP_TSTAMP:
4736 	case ICMP_TSTAMPREPLY:
4737 	case ICMP_IREQ:
4738 	case ICMP_IREQREPLY:
4739 	case ICMP_MASKREQ:
4740 	case ICMP_MASKREPLY:
4741 		return 1;
4742 	default:
4743 		return 0;
4744 	}
4745 }
4746 
4747 
4748 /* ------------------------------------------------------------------------ */
4749 /* Function:    nat_log                                                     */
4750 /* Returns:     Nil                                                         */
4751 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4752 /*              type(I) - type of log entry to create                       */
4753 /*                                                                          */
4754 /* Creates a NAT log entry.                                                 */
4755 /* ------------------------------------------------------------------------ */
4756 void nat_log(nat, type, ifs)
4757 struct nat *nat;
4758 u_int type;
4759 ipf_stack_t *ifs;
4760 {
4761 #ifdef	IPFILTER_LOG
4762 # ifndef LARGE_NAT
4763 	struct ipnat *np;
4764 	int rulen;
4765 # endif
4766 	struct natlog natl;
4767 	void *items[1];
4768 	size_t sizes[1];
4769 	int types[1];
4770 
4771 	natl.nl_inip = nat->nat_inip;
4772 	natl.nl_outip = nat->nat_outip;
4773 	natl.nl_origip = nat->nat_oip;
4774 	natl.nl_bytes[0] = nat->nat_bytes[0];
4775 	natl.nl_bytes[1] = nat->nat_bytes[1];
4776 	natl.nl_pkts[0] = nat->nat_pkts[0];
4777 	natl.nl_pkts[1] = nat->nat_pkts[1];
4778 	natl.nl_origport = nat->nat_oport;
4779 	natl.nl_inport = nat->nat_inport;
4780 	natl.nl_outport = nat->nat_outport;
4781 	natl.nl_p = nat->nat_p;
4782 	natl.nl_type = type;
4783 	natl.nl_rule = -1;
4784 # ifndef LARGE_NAT
4785 	if (nat->nat_ptr != NULL) {
4786 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4787 		     np = np->in_next, rulen++)
4788 			if (np == nat->nat_ptr) {
4789 				natl.nl_rule = rulen;
4790 				break;
4791 			}
4792 	}
4793 # endif
4794 	items[0] = &natl;
4795 	sizes[0] = sizeof(natl);
4796 	types[0] = 0;
4797 
4798 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4799 #endif
4800 }
4801 
4802 
4803 #if defined(__OpenBSD__)
4804 /* ------------------------------------------------------------------------ */
4805 /* Function:    nat_ifdetach                                                */
4806 /* Returns:     Nil                                                         */
4807 /* Parameters:  ifp(I) - pointer to network interface                       */
4808 /*                                                                          */
4809 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4810 /* interface references within IPFilter.                                    */
4811 /* ------------------------------------------------------------------------ */
4812 void nat_ifdetach(ifp, ifs)
4813 void *ifp;
4814 ipf_stack_t *ifs;
4815 {
4816 	frsync(ifp, ifs);
4817 	return;
4818 }
4819 #endif
4820 
4821 
4822 /* ------------------------------------------------------------------------ */
4823 /* Function:    fr_ipnatderef                                               */
4824 /* Returns:     Nil                                                         */
4825 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4826 /* Write Locks: ipf_nat                                                     */
4827 /*                                                                          */
4828 /* ------------------------------------------------------------------------ */
4829 void fr_ipnatderef(inp, ifs)
4830 ipnat_t **inp;
4831 ipf_stack_t *ifs;
4832 {
4833 	ipnat_t *in;
4834 
4835 	in = *inp;
4836 	*inp = NULL;
4837 	in->in_space++;
4838 	in->in_use--;
4839 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4840 		if (in->in_apr)
4841 			appr_free(in->in_apr);
4842 		KFREE(in);
4843 		ifs->ifs_nat_stats.ns_rules--;
4844 #ifdef notdef
4845 #if SOLARIS
4846 		if (ifs->ifs_nat_stats.ns_rules == 0)
4847 			ifs->ifs_pfil_delayed_copy = 1;
4848 #endif
4849 #endif
4850 	}
4851 }
4852 
4853 
4854 /* ------------------------------------------------------------------------ */
4855 /* Function:    fr_natderef                                                 */
4856 /* Returns:     Nil                                                         */
4857 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4858 /*                                                                          */
4859 /* Decrement the reference counter for this NAT table entry and free it if  */
4860 /* there are no more things using it.                                       */
4861 /*                                                                          */
4862 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4863 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4864 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4865 /* because nat_delete() will do that and send nat_ref to -1.                */
4866 /*                                                                          */
4867 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4868 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4869 /* ------------------------------------------------------------------------ */
4870 void fr_natderef(natp, ifs)
4871 nat_t **natp;
4872 ipf_stack_t *ifs;
4873 {
4874 	nat_t *nat;
4875 
4876 	nat = *natp;
4877 	*natp = NULL;
4878 
4879 	MUTEX_ENTER(&nat->nat_lock);
4880 	if (nat->nat_ref > 1) {
4881 		nat->nat_ref--;
4882 		MUTEX_EXIT(&nat->nat_lock);
4883 		return;
4884 	}
4885 	MUTEX_EXIT(&nat->nat_lock);
4886 
4887 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4888 	nat_delete(nat, NL_EXPIRE, ifs);
4889 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4890 }
4891 
4892 
4893 /* ------------------------------------------------------------------------ */
4894 /* Function:    fr_natclone                                                 */
4895 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4896 /*                           else pointer to new state structure            */
4897 /* Parameters:  fin(I) - pointer to packet information                      */
4898 /*              is(I)  - pointer to master state structure                  */
4899 /* Write Lock:  ipf_nat                                                     */
4900 /*                                                                          */
4901 /* Create a "duplcate" state table entry from the master.                   */
4902 /* ------------------------------------------------------------------------ */
4903 static nat_t *fr_natclone(fin, nat)
4904 fr_info_t *fin;
4905 nat_t *nat;
4906 {
4907 	frentry_t *fr;
4908 	nat_t *clone;
4909 	ipnat_t *np;
4910 	ipf_stack_t *ifs = fin->fin_ifs;
4911 
4912 	KMALLOC(clone, nat_t *);
4913 	if (clone == NULL)
4914 		return NULL;
4915 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4916 
4917 	MUTEX_NUKE(&clone->nat_lock);
4918 
4919 	clone->nat_aps = NULL;
4920 	/*
4921 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4922 	 */
4923 	clone->nat_tqe.tqe_pnext = NULL;
4924 	clone->nat_tqe.tqe_next = NULL;
4925 	clone->nat_tqe.tqe_ifq = NULL;
4926 	clone->nat_tqe.tqe_parent = clone;
4927 
4928 	clone->nat_flags &= ~SI_CLONE;
4929 	clone->nat_flags |= SI_CLONED;
4930 
4931 	if (clone->nat_hm)
4932 		clone->nat_hm->hm_ref++;
4933 
4934 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4935 		KFREE(clone);
4936 		return NULL;
4937 	}
4938 	np = clone->nat_ptr;
4939 	if (np != NULL) {
4940 		if (ifs->ifs_nat_logging)
4941 			nat_log(clone, (u_int)np->in_redir, ifs);
4942 		np->in_use++;
4943 	}
4944 	fr = clone->nat_fr;
4945 	if (fr != NULL) {
4946 		MUTEX_ENTER(&fr->fr_lock);
4947 		fr->fr_ref++;
4948 		MUTEX_EXIT(&fr->fr_lock);
4949 	}
4950 
4951 	/*
4952 	 * Because the clone is created outside the normal loop of things and
4953 	 * TCP has special needs in terms of state, initialise the timeout
4954 	 * state of the new NAT from here.
4955 	 */
4956 	if (clone->nat_p == IPPROTO_TCP) {
4957 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4958 				  clone->nat_flags);
4959 	}
4960 #ifdef	IPFILTER_SYNC
4961 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4962 #endif
4963 	if (ifs->ifs_nat_logging)
4964 		nat_log(clone, NL_CLONE, ifs);
4965 	return clone;
4966 }
4967 
4968 
4969 /* ------------------------------------------------------------------------ */
4970 /* Function:   nat_wildok                                                   */
4971 /* Returns:    int - 1 == packet's ports match wildcards                    */
4972 /*                   0 == packet's ports don't match wildcards              */
4973 /* Parameters: nat(I)   - NAT entry                                         */
4974 /*             sport(I) - source port                                       */
4975 /*             dport(I) - destination port                                  */
4976 /*             flags(I) - wildcard flags                                    */
4977 /*             dir(I)   - packet direction                                  */
4978 /*                                                                          */
4979 /* Use NAT entry and packet direction to determine which combination of     */
4980 /* wildcard flags should be used.                                           */
4981 /* ------------------------------------------------------------------------ */
4982 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4983 nat_t *nat;
4984 int sport;
4985 int dport;
4986 int flags;
4987 int dir;
4988 {
4989 	/*
4990 	 * When called by       dir is set to
4991 	 * nat_inlookup         NAT_INBOUND (0)
4992 	 * nat_outlookup        NAT_OUTBOUND (1)
4993 	 *
4994 	 * We simply combine the packet's direction in dir with the original
4995 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4996 	 * which combination of wildcard flags to allow.
4997 	 */
4998 
4999 	switch ((dir << 1) | nat->nat_dir)
5000 	{
5001 	case 3: /* outbound packet / outbound entry */
5002 		if (((nat->nat_inport == sport) ||
5003 		    (flags & SI_W_SPORT)) &&
5004 		    ((nat->nat_oport == dport) ||
5005 		    (flags & SI_W_DPORT)))
5006 			return 1;
5007 		break;
5008 	case 2: /* outbound packet / inbound entry */
5009 		if (((nat->nat_outport == sport) ||
5010 		    (flags & SI_W_DPORT)) &&
5011 		    ((nat->nat_oport == dport) ||
5012 		    (flags & SI_W_SPORT)))
5013 			return 1;
5014 		break;
5015 	case 1: /* inbound packet / outbound entry */
5016 		if (((nat->nat_oport == sport) ||
5017 		    (flags & SI_W_DPORT)) &&
5018 		    ((nat->nat_outport == dport) ||
5019 		    (flags & SI_W_SPORT)))
5020 			return 1;
5021 		break;
5022 	case 0: /* inbound packet / inbound entry */
5023 		if (((nat->nat_oport == sport) ||
5024 		    (flags & SI_W_SPORT)) &&
5025 		    ((nat->nat_outport == dport) ||
5026 		    (flags & SI_W_DPORT)))
5027 			return 1;
5028 		break;
5029 	default:
5030 		break;
5031 	}
5032 
5033 	return(0);
5034 }
5035 
5036 
5037 /* ------------------------------------------------------------------------ */
5038 /* Function:    nat_mssclamp                                                */
5039 /* Returns:     Nil                                                         */
5040 /* Parameters:  tcp(I)    - pointer to TCP header                           */
5041 /*              maxmss(I) - value to clamp the TCP MSS to                   */
5042 /*              csump(I)  - pointer to TCP checksum                         */
5043 /*                                                                          */
5044 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
5045 /* then the TCP header checksum will be updated to reflect the change in    */
5046 /* the MSS.                                                                 */
5047 /* ------------------------------------------------------------------------ */
5048 static void nat_mssclamp(tcp, maxmss, csump)
5049 tcphdr_t *tcp;
5050 u_32_t maxmss;
5051 u_short *csump;
5052 {
5053 	u_char *cp, *ep, opt;
5054 	int hlen, advance;
5055 	u_32_t mss, sumd;
5056 
5057 	hlen = TCP_OFF(tcp) << 2;
5058 	if (hlen > sizeof(*tcp)) {
5059 		cp = (u_char *)tcp + sizeof(*tcp);
5060 		ep = (u_char *)tcp + hlen;
5061 
5062 		while (cp < ep) {
5063 			opt = cp[0];
5064 			if (opt == TCPOPT_EOL)
5065 				break;
5066 			else if (opt == TCPOPT_NOP) {
5067 				cp++;
5068 				continue;
5069 			}
5070 
5071 			if (cp + 1 >= ep)
5072 				break;
5073 			advance = cp[1];
5074 			if ((cp + advance > ep) || (advance <= 0))
5075 				break;
5076 			switch (opt)
5077 			{
5078 			case TCPOPT_MAXSEG:
5079 				if (advance != 4)
5080 					break;
5081 				mss = cp[2] * 256 + cp[3];
5082 				if (mss > maxmss) {
5083 					cp[2] = maxmss / 256;
5084 					cp[3] = maxmss & 0xff;
5085 					CALC_SUMD(mss, maxmss, sumd);
5086 					fix_outcksum(csump, sumd);
5087 				}
5088 				break;
5089 			default:
5090 				/* ignore unknown options */
5091 				break;
5092 			}
5093 
5094 			cp += advance;
5095 		}
5096 	}
5097 }
5098 
5099 
5100 /* ------------------------------------------------------------------------ */
5101 /* Function:    fr_setnatqueue                                              */
5102 /* Returns:     Nil                                                         */
5103 /* Parameters:  nat(I)- pointer to NAT structure                            */
5104 /*              rev(I) - forward(0) or reverse(1) direction                 */
5105 /* Locks:       ipf_nat (read or write)                                     */
5106 /*                                                                          */
5107 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
5108 /* determining which queue it should be placed on.                          */
5109 /* ------------------------------------------------------------------------ */
5110 void fr_setnatqueue(nat, rev, ifs)
5111 nat_t *nat;
5112 int rev;
5113 ipf_stack_t *ifs;
5114 {
5115 	ipftq_t *oifq, *nifq;
5116 
5117 	if (nat->nat_ptr != NULL)
5118 		nifq = nat->nat_ptr->in_tqehead[rev];
5119 	else
5120 		nifq = NULL;
5121 
5122 	if (nifq == NULL) {
5123 		switch (nat->nat_p)
5124 		{
5125 		case IPPROTO_UDP :
5126 			nifq = &ifs->ifs_nat_udptq;
5127 			break;
5128 		case IPPROTO_ICMP :
5129 			nifq = &ifs->ifs_nat_icmptq;
5130 			break;
5131 		case IPPROTO_TCP :
5132 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5133 			break;
5134 		default :
5135 			nifq = &ifs->ifs_nat_iptq;
5136 			break;
5137 		}
5138 	}
5139 
5140 	oifq = nat->nat_tqe.tqe_ifq;
5141 	/*
5142 	 * If it's currently on a timeout queue, move it from one queue to
5143 	 * another, else put it on the end of the newly determined queue.
5144 	 */
5145 	if (oifq != NULL)
5146 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5147 	else
5148 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5149 	return;
5150 }
5151 
5152 /* ------------------------------------------------------------------------ */
5153 /* Function:    nat_getnext                                                 */
5154 /* Returns:     int - 0 == ok, else error                                   */
5155 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5156 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5157 /*              ifs - ipf stack instance                                    */
5158 /*                                                                          */
5159 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5160 /* and copy it out to the storage space pointed to by itp.  The next item   */
5161 /* in the list to look at is put back in the ipftoken struture.             */
5162 /* ------------------------------------------------------------------------ */
5163 static int nat_getnext(t, itp, ifs)
5164 ipftoken_t *t;
5165 ipfgeniter_t *itp;
5166 ipf_stack_t *ifs;
5167 {
5168 	hostmap_t *hm, *nexthm = NULL, zerohm;
5169 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5170 	nat_t *nat, *nextnat = NULL, zeronat;
5171 	int error = 0, count;
5172 	char *dst;
5173 
5174 	if (itp->igi_nitems == 0)
5175 		return EINVAL;
5176 
5177 	READ_ENTER(&ifs->ifs_ipf_nat);
5178 
5179 	switch (itp->igi_type)
5180 	{
5181 	case IPFGENITER_HOSTMAP :
5182 		hm = t->ipt_data;
5183 		if (hm == NULL) {
5184 			nexthm = ifs->ifs_ipf_hm_maplist;
5185 		} else {
5186 			nexthm = hm->hm_next;
5187 		}
5188 		break;
5189 
5190 	case IPFGENITER_IPNAT :
5191 		ipn = t->ipt_data;
5192 		if (ipn == NULL) {
5193 			nextipnat = ifs->ifs_nat_list;
5194 		} else {
5195 			nextipnat = ipn->in_next;
5196 		}
5197 		break;
5198 
5199 	case IPFGENITER_NAT :
5200 		nat = t->ipt_data;
5201 		if (nat == NULL) {
5202 			nextnat = ifs->ifs_nat_instances;
5203 		} else {
5204 			nextnat = nat->nat_next;
5205 		}
5206 		break;
5207 	default :
5208 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5209 		return EINVAL;
5210 	}
5211 
5212 	dst = itp->igi_data;
5213 	for (count = itp->igi_nitems; count > 0; count--) {
5214 		switch (itp->igi_type)
5215 		{
5216 		case IPFGENITER_HOSTMAP :
5217 			if (nexthm != NULL) {
5218 				ATOMIC_INC32(nexthm->hm_ref);
5219 				t->ipt_data = nexthm;
5220 			} else {
5221 				bzero(&zerohm, sizeof(zerohm));
5222 				nexthm = &zerohm;
5223 				count = 1;
5224 				t->ipt_data = NULL;
5225 			}
5226 			break;
5227 		case IPFGENITER_IPNAT :
5228 			if (nextipnat != NULL) {
5229 				ATOMIC_INC32(nextipnat->in_use);
5230 				t->ipt_data = nextipnat;
5231 			} else {
5232 				bzero(&zeroipn, sizeof(zeroipn));
5233 				nextipnat = &zeroipn;
5234 				count = 1;
5235 				t->ipt_data = NULL;
5236 			}
5237 			break;
5238 		case IPFGENITER_NAT :
5239 			if (nextnat != NULL) {
5240 				MUTEX_ENTER(&nextnat->nat_lock);
5241 				nextnat->nat_ref++;
5242 				MUTEX_EXIT(&nextnat->nat_lock);
5243 				t->ipt_data = nextnat;
5244 			} else {
5245 				bzero(&zeronat, sizeof(zeronat));
5246 				nextnat = &zeronat;
5247 				count = 1;
5248 				t->ipt_data = NULL;
5249 			}
5250 			break;
5251 		default :
5252 			break;
5253 		}
5254 
5255 		/*
5256 		 * We can safely release our hold on ipf_nat.
5257 		 */
5258 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5259 
5260 		switch (itp->igi_type)
5261 		{
5262 		case IPFGENITER_HOSTMAP :
5263 			if (hm != NULL) {
5264 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5265 				fr_hostmapdel(&hm);
5266 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5267 			}
5268 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5269 			if (error != 0) {
5270 				error = EFAULT;
5271 			} else {
5272 				dst += sizeof(*nexthm);
5273 				hm = nexthm;
5274 				nexthm = nexthm->hm_next;
5275 			}
5276 			break;
5277 		case IPFGENITER_IPNAT :
5278 			if (ipn != NULL) {
5279 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5280 				fr_ipnatderef(&ipn, ifs);
5281 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5282 			}
5283 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5284 			if (error != 0) {
5285 				error = EFAULT;
5286 			} else {
5287 				dst += sizeof(*nextipnat);
5288 				ipn = nextipnat;
5289 				nextipnat = nextipnat->in_next;
5290 			}
5291 			break;
5292 		case IPFGENITER_NAT :
5293 			if (nat != NULL) {
5294 				fr_natderef(&nat, ifs);
5295 			}
5296 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5297 			if (error != 0) {
5298 				error = EFAULT;
5299 			} else {
5300 				dst += sizeof(*nextnat);
5301 				nat = nextnat;
5302 				nextnat = nextnat->nat_next;
5303 			}
5304 			break;
5305 		default :
5306 			break;
5307 		}
5308 
5309 		if ((count == 1) || (error != 0))
5310 			break;
5311 
5312 		READ_ENTER(&ifs->ifs_ipf_nat);
5313 	}
5314 
5315 	return error;
5316 }
5317 
5318 
5319 /* ------------------------------------------------------------------------ */
5320 /* Function:    nat_iterator                                                */
5321 /* Returns:     int - 0 == ok, else error                                   */
5322 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5323 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5324 /*                                                                          */
5325 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5326 /* generic structure to iterate through a list.  There are three different  */
5327 /* linked lists of NAT related information to go through: NAT rules, active */
5328 /* NAT mappings and the NAT fragment cache.                                 */
5329 /* ------------------------------------------------------------------------ */
5330 static int nat_iterator(token, itp, ifs)
5331 ipftoken_t *token;
5332 ipfgeniter_t *itp;
5333 ipf_stack_t *ifs;
5334 {
5335 	int error;
5336 
5337 	if (itp->igi_data == NULL)
5338 		return EFAULT;
5339 
5340 	token->ipt_subtype = itp->igi_type;
5341 
5342 	switch (itp->igi_type)
5343 	{
5344 	case IPFGENITER_HOSTMAP :
5345 	case IPFGENITER_IPNAT :
5346 	case IPFGENITER_NAT :
5347 		error = nat_getnext(token, itp, ifs);
5348 		break;
5349 	case IPFGENITER_NATFRAG :
5350 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5351 				    &ifs->ifs_ipfr_nattail,
5352 				    &ifs->ifs_ipf_natfrag, ifs);
5353 		break;
5354 	default :
5355 		error = EINVAL;
5356 		break;
5357 	}
5358 
5359 	return error;
5360 }
5361 
5362 
5363 /* -------------------------------------------------------------------- */
5364 /* Function:	nat_earlydrop						*/
5365 /* Returns:	number of dropped/removed entries from the queue	*/
5366 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5367 /*		maxidle - entry must be idle this long to be dropped	*/
5368 /*		ifs - ipf stack instance				*/
5369 /*									*/
5370 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5371 /* form specified timeout queue, based on how long they've sat idle,	*/
5372 /* without waiting for it to happen on its own.				*/
5373 /* -------------------------------------------------------------------- */
5374 static int nat_earlydrop(ifq, maxidle, ifs)
5375 ipftq_t *ifq;
5376 int maxidle;
5377 ipf_stack_t *ifs;
5378 {
5379 	ipftqent_t *tqe, *tqn;
5380 	nat_t *nat;
5381 	unsigned int dropped;
5382 	int droptick;
5383 
5384 	if (ifq == NULL)
5385 		return (0);
5386 
5387 	dropped = 0;
5388 
5389 	/*
5390 	 * Determine the tick representing the idle time we're interested
5391 	 * in.  If an entry exists in the queue, and it was touched before
5392 	 * that tick, then it's been idle longer than maxidle ... remove it.
5393 	 */
5394 	droptick = ifs->ifs_fr_ticks - maxidle;
5395 	tqn = ifq->ifq_head;
5396 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5397 		tqn = tqe->tqe_next;
5398 		nat = tqe->tqe_parent;
5399 		nat_delete(nat, ISL_EXPIRE, ifs);
5400 		dropped++;
5401 	}
5402 	return (dropped);
5403 }
5404 
5405 
5406 /* --------------------------------------------------------------------- */
5407 /* Function:	nat_flushclosing					 */
5408 /* Returns:	int - number of NAT entries deleted			 */
5409 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5410 /*		ifs - ipf stack instance				 */
5411 /*									 */
5412 /* Remove nat table entries for TCP connections which are in the process */
5413 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5414 /* --------------------------------------------------------------------- */
5415 static int nat_flushclosing(stateval, ifs)
5416 int stateval;
5417 ipf_stack_t *ifs;
5418 {
5419 	ipftq_t *ifq, *ifqn;
5420 	ipftqent_t *tqe, *tqn;
5421 	nat_t *nat;
5422 	int dropped;
5423 
5424 	dropped = 0;
5425 
5426 	/*
5427 	 * Start by deleting any entries in specific timeout queues.
5428 	 */
5429 	ifqn = &ifs->ifs_nat_tqb[stateval];
5430 	while ((ifq = ifqn) != NULL) {
5431 		ifqn = ifq->ifq_next;
5432 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5433 	}
5434 
5435 	/*
5436 	 * Next, look through user defined queues for closing entries.
5437 	 */
5438 	ifqn = ifs->ifs_nat_utqe;
5439 	while ((ifq = ifqn) != NULL) {
5440 		ifqn = ifq->ifq_next;
5441 		tqn = ifq->ifq_head;
5442 		while ((tqe = tqn) != NULL) {
5443 			tqn = tqe->tqe_next;
5444 			nat = tqe->tqe_parent;
5445 			if (nat->nat_p != IPPROTO_TCP)
5446 				continue;
5447 			if ((nat->nat_tcpstate[0] >= stateval) &&
5448 			    (nat->nat_tcpstate[1] >= stateval)) {
5449 				nat_delete(nat, NL_EXPIRE, ifs);
5450 				dropped++;
5451 			}
5452 		}
5453 	}
5454 	return (dropped);
5455 }
5456 
5457 
5458 /* --------------------------------------------------------------------- */
5459 /* Function:	nat_extraflush						 */
5460 /* Returns:	int - number of NAT entries deleted			 */
5461 /* Parameters:	which(I) - how to flush the active NAT table		 */
5462 /*		ifs - ipf stack instance				 */
5463 /* Write Locks:	ipf_nat							 */
5464 /*									 */
5465 /* Flush nat tables.  Three actions currently defined:			 */
5466 /*									 */
5467 /* which == 0 :	Flush all nat table entries.				 */
5468 /*									 */
5469 /* which == 1 :	Flush entries with TCP connections which have started	 */
5470 /*		to close on both ends.					 */
5471 /*									 */
5472 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5473 /*		does not take us below specified threshold in the table, */
5474 /*		we want to flush entries with TCP connections which have */
5475 /*		been idle for a long time.  Start with connections idle	 */
5476 /*		over 12 hours,  and then work backwards in half hour	 */
5477 /*		increments to at most 30 minutes idle, and finally work	 */
5478 /*		back in 30 second increments to at most 30 seconds.	 */
5479 /* --------------------------------------------------------------------- */
5480 static int nat_extraflush(which, ifs)
5481 int which;
5482 ipf_stack_t *ifs;
5483 {
5484 	ipftq_t *ifq, *ifqn;
5485 	nat_t *nat, **natp;
5486 	int idletime, removed, idle_idx;
5487 	SPL_INT(s);
5488 
5489 	removed = 0;
5490 
5491 	SPL_NET(s);
5492 	switch (which)
5493 	{
5494 	case 0:
5495 		natp = &ifs->ifs_nat_instances;
5496 		while ((nat = *natp) != NULL) {
5497 			natp = &nat->nat_next;
5498 			nat_delete(nat, ISL_FLUSH, ifs);
5499 			removed++;
5500 		}
5501 		break;
5502 
5503 	case 1:
5504 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5505 		break;
5506 
5507 	case 2:
5508 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5509 
5510 		/*
5511 		 * Be sure we haven't done this in the last 10 seconds.
5512 		 */
5513 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5514 		    IPF_TTLVAL(10))
5515 			break;
5516 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5517 
5518 		/*
5519 		 * Determine initial threshold for minimum idle time based on
5520 		 * how long ipfilter has been running.  Ipfilter needs to have
5521 		 * been up as long as the smallest interval to continue on.
5522 		 *
5523 		 * Minimum idle times stored in idletime_tab and indexed by
5524 		 * idle_idx.  Start at upper end of array and work backwards.
5525 		 *
5526 		 * Once the index is found, set the initial idle time to the
5527 		 * first interval before the current ipfilter run time.
5528 		 */
5529 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5530 			break;  /* switch */
5531 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5532 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5533 			idletime = idletime_tab[idle_idx];
5534 		} else {
5535 			while ((idle_idx > 0) &&
5536 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5537 				idle_idx--;
5538 			idletime = (ifs->ifs_fr_ticks /
5539 				    idletime_tab[idle_idx]) *
5540 				    idletime_tab[idle_idx];
5541 		}
5542 
5543 		while ((idle_idx >= 0) &&
5544 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5545 			/*
5546 			 * Start with appropriate timeout queue.
5547 			 */
5548 			removed += nat_earlydrop(
5549 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5550 					idletime, ifs);
5551 
5552 			/*
5553 			 * Make sure we haven't already deleted enough
5554 			 * entries before checking the user defined queues.
5555 			 */
5556 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5557 			    ifs->ifs_nat_flush_lvl_lo)
5558 				break;
5559 
5560 			/*
5561 			 * Next, look through the user defined queues.
5562 			 */
5563 			ifqn = ifs->ifs_nat_utqe;
5564 			while ((ifq = ifqn) != NULL) {
5565 				ifqn = ifq->ifq_next;
5566 				removed += nat_earlydrop(ifq, idletime, ifs);
5567 			}
5568 
5569 			/*
5570 			 * Adjust the granularity of idle time.
5571 			 *
5572 			 * If we reach an interval boundary, we need to
5573 			 * either adjust the idle time accordingly or exit
5574 			 * the loop altogether (if this is very last check).
5575 			 */
5576 			idletime -= idletime_tab[idle_idx];
5577 			if (idletime < idletime_tab[idle_idx]) {
5578 				if (idle_idx != 0) {
5579 					idletime = idletime_tab[idle_idx] -
5580 					    idletime_tab[idle_idx - 1];
5581 					idle_idx--;
5582 				} else {
5583 					break;  /* while */
5584 				}
5585 			}
5586 		}
5587 		break;
5588 	default:
5589 		break;
5590 	}
5591 
5592 	SPL_X(s);
5593 	return (removed);
5594 }
5595