xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 90b0a856)
1 /*
2  * Copyright (C) 1995-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"$
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
24     defined(_KERNEL)
25 # include "opt_ipfilter_log.h"
26 #endif
27 #if !defined(_KERNEL)
28 # include <stdio.h>
29 # include <string.h>
30 # include <stdlib.h>
31 # define _KERNEL
32 # ifdef __OpenBSD__
33 struct file;
34 # endif
35 # include <sys/uio.h>
36 # undef _KERNEL
37 #endif
38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
39 # include <sys/filio.h>
40 # include <sys/fcntl.h>
41 #else
42 # include <sys/ioctl.h>
43 #endif
44 #if !defined(AIX)
45 # include <sys/fcntl.h>
46 #endif
47 #if !defined(linux)
48 # include <sys/protosw.h>
49 #endif
50 #include <sys/socket.h>
51 #if defined(_KERNEL)
52 # include <sys/systm.h>
53 # if !defined(__SVR4) && !defined(__svr4__)
54 #  include <sys/mbuf.h>
55 # endif
56 #endif
57 #if defined(__SVR4) || defined(__svr4__)
58 # include <sys/filio.h>
59 # include <sys/byteorder.h>
60 # ifdef _KERNEL
61 #  include <sys/dditypes.h>
62 # endif
63 # include <sys/stream.h>
64 # include <sys/kmem.h>
65 #endif
66 #if __FreeBSD_version >= 300000
67 # include <sys/queue.h>
68 #endif
69 #include <net/if.h>
70 #if __FreeBSD_version >= 300000
71 # include <net/if_var.h>
72 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
73 #  include "opt_ipfilter.h"
74 # endif
75 #endif
76 #ifdef sun
77 # include <net/af.h>
78 #endif
79 #include <net/route.h>
80 #include <netinet/in.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/ip.h>
83 
84 #ifdef RFC1825
85 # include <vpn/md5.h>
86 # include <vpn/ipsec.h>
87 extern struct ifnet vpnif;
88 #endif
89 
90 #if !defined(linux)
91 # include <netinet/ip_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip_icmp.h>
96 #include "netinet/ip_compat.h"
97 #include <netinet/tcpip.h>
98 #include "netinet/ip_fil.h"
99 #include "netinet/ip_nat.h"
100 #include "netinet/ip_frag.h"
101 #include "netinet/ip_state.h"
102 #include "netinet/ip_proxy.h"
103 #include "netinet/ipf_stack.h"
104 #ifdef	IPFILTER_SYNC
105 #include "netinet/ip_sync.h"
106 #endif
107 #if (__FreeBSD_version >= 300000)
108 # include <sys/malloc.h>
109 #endif
110 /* END OF INCLUDES */
111 
112 #undef	SOCKADDR_IN
113 #define	SOCKADDR_IN	struct sockaddr_in
114 
115 #if !defined(lint)
116 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
117 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
118 #endif
119 
120 
121 /* ======================================================================== */
122 /* How the NAT is organised and works.                                      */
123 /*                                                                          */
124 /* Inside (interface y) NAT       Outside (interface x)                     */
125 /* -------------------- -+- -------------------------------------           */
126 /* Packet going          |   out, processsed by fr_checknatout() for x      */
127 /* ------------>         |   ------------>                                  */
128 /* src=10.1.1.1          |   src=192.1.1.1                                  */
129 /*                       |                                                  */
130 /*                       |   in, processed by fr_checknatin() for x         */
131 /* <------------         |   <------------                                  */
132 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
133 /* -------------------- -+- -------------------------------------           */
134 /* fr_checknatout() - changes ip_src and if required, sport                 */
135 /*             - creates a new mapping, if required.                        */
136 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
137 /*                                                                          */
138 /* In the NAT table, internal source is recorded as "in" and externally     */
139 /* seen as "out".                                                           */
140 /* ======================================================================== */
141 
142 
143 static	int	nat_flushtable __P((ipf_stack_t *));
144 static	int	nat_clearlist __P((ipf_stack_t *));
145 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
146 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
147 static	void	nat_delete __P((struct nat *, int, ipf_stack_t *));
148 static	void	nat_delrdr __P((struct ipnat *));
149 static	void	nat_delnat __P((struct ipnat *));
150 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
151 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
152 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
153 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
154 static	int	nat_match __P((fr_info_t *, ipnat_t *));
155 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
156 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
157 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
158 				    struct in_addr, struct in_addr, u_32_t,
159 				    ipf_stack_t *));
160 static	INLINE	int nat_icmpquerytype4 __P((int));
161 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int,
162 				    ipf_stack_t *));
163 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int,
164 				    ipf_stack_t *));
165 static	INLINE	int nat_icmperrortype4 __P((int));
166 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
167 				      tcphdr_t *, nat_t **, int));
168 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
169 static	nat_t	*fr_natclone __P((fr_info_t *, nat_t *));
170 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
171 static	INLINE	int nat_wildok __P((nat_t *, int, int, int, int));
172 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
173 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
174 static	int	nat_extraflush __P((int, ipf_stack_t *));
175 static	int	nat_earlydrop __P((ipftq_t *, int, ipf_stack_t *));
176 static	int	nat_flushclosing __P((int, ipf_stack_t *));
177 
178 
179 /*
180  * Below we declare a list of constants used only in the nat_extraflush()
181  * routine.  We are placing it here, instead of in nat_extraflush() itself,
182  * because we want to make it visible to tools such as mdb, nm etc., so the
183  * values can easily be altered during debugging.
184  */
185 static	const int	idletime_tab[] = {
186 	IPF_TTLVAL(30),		/* 30 seconds */
187 	IPF_TTLVAL(1800),	/* 30 minutes */
188 	IPF_TTLVAL(43200),	/* 12 hours */
189 	IPF_TTLVAL(345600),	/* 4 days */
190 };
191 
192 
193 /* ------------------------------------------------------------------------ */
194 /* Function:    fr_natinit                                                  */
195 /* Returns:     int - 0 == success, -1 == failure                           */
196 /* Parameters:  Nil                                                         */
197 /*                                                                          */
198 /* Initialise all of the NAT locks, tables and other structures.            */
199 /* ------------------------------------------------------------------------ */
200 int fr_natinit(ifs)
201 ipf_stack_t *ifs;
202 {
203 	int i;
204 
205 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
206 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
207 	if (ifs->ifs_nat_table[0] != NULL)
208 		bzero((char *)ifs->ifs_nat_table[0],
209 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
210 	else
211 		return -1;
212 
213 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
214 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
215 	if (ifs->ifs_nat_table[1] != NULL)
216 		bzero((char *)ifs->ifs_nat_table[1],
217 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
218 	else
219 		return -2;
220 
221 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
222 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
223 	if (ifs->ifs_nat_rules != NULL)
224 		bzero((char *)ifs->ifs_nat_rules,
225 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
226 	else
227 		return -3;
228 
229 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
230 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
231 	if (ifs->ifs_rdr_rules != NULL)
232 		bzero((char *)ifs->ifs_rdr_rules,
233 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
234 	else
235 		return -4;
236 
237 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
238 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
239 	if (ifs->ifs_maptable != NULL)
240 		bzero((char *)ifs->ifs_maptable,
241 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
242 	else
243 		return -5;
244 
245 	ifs->ifs_ipf_hm_maplist = NULL;
246 
247 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
248 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
249 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
250 		return -1;
251 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
252 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
253 
254 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
255 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
256 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
257 		return -1;
258 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
259 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
260 
261 	if (ifs->ifs_fr_nat_maxbucket == 0) {
262 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
263 			ifs->ifs_fr_nat_maxbucket++;
264 		ifs->ifs_fr_nat_maxbucket *= 2;
265 	}
266 
267 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
268 	/*
269 	 * Increase this because we may have "keep state" following this too
270 	 * and packet storms can occur if this is removed too quickly.
271 	 */
272 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
273 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
274 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
275 	ifs->ifs_nat_udptq.ifq_ref = 1;
276 	ifs->ifs_nat_udptq.ifq_head = NULL;
277 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
278 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
279 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
280 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
281 	ifs->ifs_nat_icmptq.ifq_ref = 1;
282 	ifs->ifs_nat_icmptq.ifq_head = NULL;
283 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
284 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
285 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
286 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
287 	ifs->ifs_nat_iptq.ifq_ref = 1;
288 	ifs->ifs_nat_iptq.ifq_head = NULL;
289 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
290 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
291 	ifs->ifs_nat_iptq.ifq_next = NULL;
292 
293 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
294 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
295 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
296 #ifdef LARGE_NAT
297 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
298 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
299 #endif
300 	}
301 
302 	/*
303 	 * Increase this because we may have "keep state" following
304 	 * this too and packet storms can occur if this is removed
305 	 * too quickly.
306 	 */
307 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
308 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
309 
310 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
311 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
312 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
313 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
314 
315 	ifs->ifs_fr_nat_init = 1;
316 
317 	return 0;
318 }
319 
320 
321 /* ------------------------------------------------------------------------ */
322 /* Function:    nat_addrdr                                                  */
323 /* Returns:     Nil                                                         */
324 /* Parameters:  n(I) - pointer to NAT rule to add                           */
325 /*                                                                          */
326 /* Adds a redirect rule to the hash table of redirect rules and the list of */
327 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
328 /* use by redirect rules.                                                   */
329 /* ------------------------------------------------------------------------ */
330 static void nat_addrdr(n, ifs)
331 ipnat_t *n;
332 ipf_stack_t *ifs;
333 {
334 	ipnat_t **np;
335 	u_32_t j;
336 	u_int hv;
337 	int k;
338 
339 	k = count4bits(n->in_outmsk);
340 	if ((k >= 0) && (k != 32))
341 		ifs->ifs_rdr_masks |= 1 << k;
342 	j = (n->in_outip & n->in_outmsk);
343 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
344 	np = ifs->ifs_rdr_rules + hv;
345 	while (*np != NULL)
346 		np = &(*np)->in_rnext;
347 	n->in_rnext = NULL;
348 	n->in_prnext = np;
349 	n->in_hv = hv;
350 	*np = n;
351 }
352 
353 
354 /* ------------------------------------------------------------------------ */
355 /* Function:    nat_addnat                                                  */
356 /* Returns:     Nil                                                         */
357 /* Parameters:  n(I) - pointer to NAT rule to add                           */
358 /*                                                                          */
359 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
360 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
361 /* redirect rules.                                                          */
362 /* ------------------------------------------------------------------------ */
363 static void nat_addnat(n, ifs)
364 ipnat_t *n;
365 ipf_stack_t *ifs;
366 {
367 	ipnat_t **np;
368 	u_32_t j;
369 	u_int hv;
370 	int k;
371 
372 	k = count4bits(n->in_inmsk);
373 	if ((k >= 0) && (k != 32))
374 		ifs->ifs_nat_masks |= 1 << k;
375 	j = (n->in_inip & n->in_inmsk);
376 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
377 	np = ifs->ifs_nat_rules + hv;
378 	while (*np != NULL)
379 		np = &(*np)->in_mnext;
380 	n->in_mnext = NULL;
381 	n->in_pmnext = np;
382 	n->in_hv = hv;
383 	*np = n;
384 }
385 
386 
387 /* ------------------------------------------------------------------------ */
388 /* Function:    nat_delrdr                                                  */
389 /* Returns:     Nil                                                         */
390 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
391 /*                                                                          */
392 /* Removes a redirect rule from the hash table of redirect rules.           */
393 /* ------------------------------------------------------------------------ */
394 static void nat_delrdr(n)
395 ipnat_t *n;
396 {
397 	if (n->in_rnext)
398 		n->in_rnext->in_prnext = n->in_prnext;
399 	*n->in_prnext = n->in_rnext;
400 }
401 
402 
403 /* ------------------------------------------------------------------------ */
404 /* Function:    nat_delnat                                                  */
405 /* Returns:     Nil                                                         */
406 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
407 /*                                                                          */
408 /* Removes a NAT map rule from the hash table of NAT map rules.             */
409 /* ------------------------------------------------------------------------ */
410 static void nat_delnat(n)
411 ipnat_t *n;
412 {
413 	if (n->in_mnext != NULL)
414 		n->in_mnext->in_pmnext = n->in_pmnext;
415 	*n->in_pmnext = n->in_mnext;
416 }
417 
418 
419 /* ------------------------------------------------------------------------ */
420 /* Function:    nat_hostmap                                                 */
421 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
422 /*                                else a pointer to the hostmapping to use  */
423 /* Parameters:  np(I)   - pointer to NAT rule                               */
424 /*              real(I) - real IP address                                   */
425 /*              map(I)  - mapped IP address                                 */
426 /*              port(I) - destination port number                           */
427 /* Write Locks: ipf_nat                                                     */
428 /*                                                                          */
429 /* Check if an ip address has already been allocated for a given mapping    */
430 /* that is not doing port based translation.  If is not yet allocated, then */
431 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
432 /* ------------------------------------------------------------------------ */
433 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
434 ipnat_t *np;
435 struct in_addr src;
436 struct in_addr dst;
437 struct in_addr map;
438 u_32_t port;
439 ipf_stack_t *ifs;
440 {
441 	hostmap_t *hm;
442 	u_int hv;
443 
444 	hv = (src.s_addr ^ dst.s_addr);
445 	hv += src.s_addr;
446 	hv += dst.s_addr;
447 	hv %= HOSTMAP_SIZE;
448 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
449 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
450 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
451 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
452 		    ((port == 0) || (port == hm->hm_port))) {
453 			hm->hm_ref++;
454 			return hm;
455 		}
456 
457 	if (np == NULL)
458 		return NULL;
459 
460 	KMALLOC(hm, hostmap_t *);
461 	if (hm) {
462 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
463 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
464 		if (ifs->ifs_ipf_hm_maplist != NULL)
465 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
466 		ifs->ifs_ipf_hm_maplist = hm;
467 
468 		hm->hm_next = ifs->ifs_maptable[hv];
469 		hm->hm_pnext = ifs->ifs_maptable + hv;
470 		if (ifs->ifs_maptable[hv] != NULL)
471 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
472 		ifs->ifs_maptable[hv] = hm;
473 		hm->hm_ipnat = np;
474 		hm->hm_srcip = src;
475 		hm->hm_dstip = dst;
476 		hm->hm_mapip = map;
477 		hm->hm_ref = 1;
478 		hm->hm_port = port;
479 	}
480 	return hm;
481 }
482 
483 
484 /* ------------------------------------------------------------------------ */
485 /* Function:    fr_hostmapdel                                              */
486 /* Returns:     Nil                                                         */
487 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
488 /* Write Locks: ipf_nat                                                     */
489 /*                                                                          */
490 /* Decrement the references to this hostmap structure by one.  If this      */
491 /* reaches zero then remove it and free it.                                 */
492 /* ------------------------------------------------------------------------ */
493 void fr_hostmapdel(hmp)
494 struct hostmap **hmp;
495 {
496 	struct hostmap *hm;
497 
498 	hm = *hmp;
499 	*hmp = NULL;
500 
501 	hm->hm_ref--;
502 	if (hm->hm_ref == 0) {
503 		if (hm->hm_next)
504 			hm->hm_next->hm_pnext = hm->hm_pnext;
505 		*hm->hm_pnext = hm->hm_next;
506 		if (hm->hm_hnext)
507 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
508 		*hm->hm_phnext = hm->hm_hnext;
509 		KFREE(hm);
510 	}
511 }
512 
513 
514 /* ------------------------------------------------------------------------ */
515 /* Function:    fix_outcksum                                                */
516 /* Returns:     Nil                                                         */
517 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
518 /*              n((I)  - amount to adjust checksum by                       */
519 /*                                                                          */
520 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
521 /* ------------------------------------------------------------------------ */
522 void fix_outcksum(sp, n)
523 u_short *sp;
524 u_32_t n;
525 {
526 	u_short sumshort;
527 	u_32_t sum1;
528 
529 	if (n == 0)
530 		return;
531 
532 	sum1 = (~ntohs(*sp)) & 0xffff;
533 	sum1 += (n);
534 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
535 	/* Again */
536 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
537 	sumshort = ~(u_short)sum1;
538 	*(sp) = htons(sumshort);
539 }
540 
541 
542 /* ------------------------------------------------------------------------ */
543 /* Function:    fix_incksum                                                 */
544 /* Returns:     Nil                                                         */
545 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
546 /*              n((I)  - amount to adjust checksum by                       */
547 /*                                                                          */
548 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
549 /* ------------------------------------------------------------------------ */
550 void fix_incksum(sp, n)
551 u_short *sp;
552 u_32_t n;
553 {
554 	u_short sumshort;
555 	u_32_t sum1;
556 
557 	if (n == 0)
558 		return;
559 
560 	sum1 = (~ntohs(*sp)) & 0xffff;
561 	sum1 += ~(n) & 0xffff;
562 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
563 	/* Again */
564 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
565 	sumshort = ~(u_short)sum1;
566 	*(sp) = htons(sumshort);
567 }
568 
569 
570 /* ------------------------------------------------------------------------ */
571 /* Function:    fix_datacksum                                               */
572 /* Returns:     Nil                                                         */
573 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
574 /*              n((I)  - amount to adjust checksum by                       */
575 /*                                                                          */
576 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
577 /* data section of an IP packet.                                            */
578 /*                                                                          */
579 /* The only situation in which you need to do this is when NAT'ing an       */
580 /* ICMP error message. Such a message, contains in its body the IP header   */
581 /* of the original IP packet, that causes the error.                        */
582 /*                                                                          */
583 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
584 /* kernel the data section of the ICMP error is just data, and no special   */
585 /* processing like hardware cksum or ntohs processing have been done by the */
586 /* kernel on the data section.                                              */
587 /* ------------------------------------------------------------------------ */
588 void fix_datacksum(sp, n)
589 u_short *sp;
590 u_32_t n;
591 {
592 	u_short sumshort;
593 	u_32_t sum1;
594 
595 	if (n == 0)
596 		return;
597 
598 	sum1 = (~ntohs(*sp)) & 0xffff;
599 	sum1 += (n);
600 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
601 	/* Again */
602 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
603 	sumshort = ~(u_short)sum1;
604 	*(sp) = htons(sumshort);
605 }
606 
607 
608 /* ------------------------------------------------------------------------ */
609 /* Function:    fr_nat_ioctl                                                */
610 /* Returns:     int - 0 == success, != 0 == failure                         */
611 /* Parameters:  data(I) - pointer to ioctl data                             */
612 /*              cmd(I)  - ioctl command integer                             */
613 /*              mode(I) - file mode bits used with open                     */
614 /*                                                                          */
615 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
616 /* ------------------------------------------------------------------------ */
617 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
618 ioctlcmd_t cmd;
619 caddr_t data;
620 int mode, uid;
621 void *ctx;
622 ipf_stack_t *ifs;
623 {
624 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
625 	int error = 0, ret, arg, getlock;
626 	ipnat_t natd;
627 
628 #if (BSD >= 199306) && defined(_KERNEL)
629 	if ((securelevel >= 2) && (mode & FWRITE))
630 		return EPERM;
631 #endif
632 
633 #if defined(__osf__) && defined(_KERNEL)
634 	getlock = 0;
635 #else
636 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
637 #endif
638 
639 	nat = NULL;     /* XXX gcc -Wuninitialized */
640 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
641 		KMALLOC(nt, ipnat_t *);
642 	} else {
643 		nt = NULL;
644 	}
645 
646 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
647 		if (mode & NAT_SYSSPACE) {
648 			bcopy(data, (char *)&natd, sizeof(natd));
649 			error = 0;
650 		} else {
651 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
652 		}
653 
654 	} else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */
655 		BCOPYIN(data, &arg, sizeof(arg));
656 	}
657 
658 	if (error != 0)
659 		goto done;
660 
661 	/*
662 	 * For add/delete, look to see if the NAT entry is already present
663 	 */
664 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
665 		nat = &natd;
666 		if (nat->in_v == 0)	/* For backward compat. */
667 			nat->in_v = 4;
668 		nat->in_flags &= IPN_USERFLAGS;
669 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
670 			if ((nat->in_flags & IPN_SPLIT) == 0)
671 				nat->in_inip &= nat->in_inmsk;
672 			if ((nat->in_flags & IPN_IPRANGE) == 0)
673 				nat->in_outip &= nat->in_outmsk;
674 		}
675 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
676 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
677 		     np = &n->in_next)
678 			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
679 					IPN_CMPSIZ))
680 				break;
681 	}
682 
683 	switch (cmd)
684 	{
685 	case SIOCGENITER :
686 	    {
687 		ipfgeniter_t iter;
688 		ipftoken_t *token;
689 
690 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
691 		if (error != 0)
692 			break;
693 
694 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
695 		if (token != NULL)
696 			error  = nat_iterator(token, &iter, ifs);
697 		else
698 			error = ESRCH;
699 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
700 		break;
701 	    }
702 #ifdef  IPFILTER_LOG
703 	case SIOCIPFFB :
704 	{
705 		int tmp;
706 
707 		if (!(mode & FWRITE))
708 			error = EPERM;
709 		else {
710 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
711 			BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp));
712 		}
713 		break;
714 	}
715 	case SIOCSETLG :
716 		if (!(mode & FWRITE))
717 			error = EPERM;
718 		else {
719 			BCOPYIN((char *)data,
720 				       (char *)&ifs->ifs_nat_logging,
721 				sizeof(ifs->ifs_nat_logging));
722 		}
723 		break;
724 	case SIOCGETLG :
725 		BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
726 			sizeof(ifs->ifs_nat_logging));
727 		break;
728 	case FIONREAD :
729 		arg = ifs->ifs_iplused[IPL_LOGNAT];
730 		BCOPYOUT(&arg, data, sizeof(arg));
731 		break;
732 #endif
733 	case SIOCADNAT :
734 		if (!(mode & FWRITE)) {
735 			error = EPERM;
736 		} else if (n != NULL) {
737 			error = EEXIST;
738 		} else if (nt == NULL) {
739 			error = ENOMEM;
740 		}
741 		if (error != 0) {
742 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
743 			break;
744 		}
745 		bcopy((char *)nat, (char *)nt, sizeof(*n));
746 		error = nat_siocaddnat(nt, np, getlock, ifs);
747 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
748 		if (error == 0)
749 			nt = NULL;
750 		break;
751 	case SIOCRMNAT :
752 		if (!(mode & FWRITE)) {
753 			error = EPERM;
754 			n = NULL;
755 		} else if (n == NULL) {
756 			error = ESRCH;
757 		}
758 
759 		if (error != 0) {
760 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
761 			break;
762 		}
763 		nat_siocdelnat(n, np, getlock, ifs);
764 
765 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
766 		n = NULL;
767 		break;
768 	case SIOCGNATS :
769 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
770 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
771 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
772 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
773 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
774 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
775 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
776 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
777 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
778 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
779 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
780 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
781 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
782 		break;
783 	case SIOCGNATL :
784 	    {
785 		natlookup_t nl;
786 
787 		if (getlock) {
788 			READ_ENTER(&ifs->ifs_ipf_nat);
789 		}
790 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
791 		if (error == 0) {
792 			if (nat_lookupredir(&nl, ifs) != NULL) {
793 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
794 			} else {
795 				error = ESRCH;
796 			}
797 		}
798 		if (getlock) {
799 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
800 		}
801 		break;
802 	    }
803 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
804 		if (!(mode & FWRITE)) {
805 			error = EPERM;
806 			break;
807 		}
808 		if (getlock) {
809 			WRITE_ENTER(&ifs->ifs_ipf_nat);
810 		}
811 		error = 0;
812 		if (arg == 0)
813 			ret = nat_flushtable(ifs);
814 		else if (arg == 1)
815 			ret = nat_clearlist(ifs);
816 		else if (arg >= 2 && arg <= 4)
817 			ret = nat_extraflush(arg - 2, ifs);
818 		else
819 			error = EINVAL;
820 		if (getlock) {
821 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
822 		}
823 		if (error == 0) {
824 			BCOPYOUT(&ret, data, sizeof(ret));
825 		}
826 		break;
827 	case SIOCPROXY :
828 		error = appr_ioctl(data, cmd, mode, ifs);
829 		break;
830 	case SIOCSTLCK :
831 		if (!(mode & FWRITE)) {
832 			error = EPERM;
833 		} else {
834 			fr_lock(data, &ifs->ifs_fr_nat_lock);
835 		}
836 		break;
837 	case SIOCSTPUT :
838 		if ((mode & FWRITE) != 0) {
839 			error = fr_natputent(data, getlock, ifs);
840 		} else {
841 			error = EACCES;
842 		}
843 		break;
844 	case SIOCSTGSZ :
845 		if (ifs->ifs_fr_nat_lock) {
846 			if (getlock) {
847 				READ_ENTER(&ifs->ifs_ipf_nat);
848 			}
849 			error = fr_natgetsz(data, ifs);
850 			if (getlock) {
851 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
852 			}
853 		} else
854 			error = EACCES;
855 		break;
856 	case SIOCSTGET :
857 		if (ifs->ifs_fr_nat_lock) {
858 			if (getlock) {
859 				READ_ENTER(&ifs->ifs_ipf_nat);
860 			}
861 			error = fr_natgetent(data, ifs);
862 			if (getlock) {
863 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
864 			}
865 		} else
866 			error = EACCES;
867 		break;
868 	case SIOCIPFDELTOK :
869 		(void) BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
870 		error = ipf_deltoken(arg, uid, ctx, ifs);
871 		break;
872 	default :
873 		error = EINVAL;
874 		break;
875 	}
876 done:
877 	if (nt)
878 		KFREE(nt);
879 	return error;
880 }
881 
882 
883 /* ------------------------------------------------------------------------ */
884 /* Function:    nat_siocaddnat                                              */
885 /* Returns:     int - 0 == success, != 0 == failure                         */
886 /* Parameters:  n(I)       - pointer to new NAT rule                        */
887 /*              np(I)      - pointer to where to insert new NAT rule        */
888 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
889 /* Mutex Locks: ipf_natio                                                   */
890 /*                                                                          */
891 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
892 /* from information passed to the kernel, then add it  to the appropriate   */
893 /* NAT rule table(s).                                                       */
894 /* ------------------------------------------------------------------------ */
895 static int nat_siocaddnat(n, np, getlock, ifs)
896 ipnat_t *n, **np;
897 int getlock;
898 ipf_stack_t *ifs;
899 {
900 	int error = 0, i, j;
901 
902 	if (nat_resolverule(n, ifs) != 0)
903 		return ENOENT;
904 
905 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
906 		return EINVAL;
907 
908 	n->in_use = 0;
909 	if (n->in_redir & NAT_MAPBLK)
910 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
911 	else if (n->in_flags & IPN_AUTOPORTMAP)
912 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
913 	else if (n->in_flags & IPN_IPRANGE)
914 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
915 	else if (n->in_flags & IPN_SPLIT)
916 		n->in_space = 2;
917 	else if (n->in_outmsk != 0)
918 		n->in_space = ~ntohl(n->in_outmsk);
919 	else
920 		n->in_space = 1;
921 
922 	/*
923 	 * Calculate the number of valid IP addresses in the output
924 	 * mapping range.  In all cases, the range is inclusive of
925 	 * the start and ending IP addresses.
926 	 * If to a CIDR address, lose 2: broadcast + network address
927 	 *                               (so subtract 1)
928 	 * If to a range, add one.
929 	 * If to a single IP address, set to 1.
930 	 */
931 	if (n->in_space) {
932 		if ((n->in_flags & IPN_IPRANGE) != 0)
933 			n->in_space += 1;
934 		else
935 			n->in_space -= 1;
936 	} else
937 		n->in_space = 1;
938 
939 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
940 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
941 		n->in_nip = ntohl(n->in_outip) + 1;
942 	else if ((n->in_flags & IPN_SPLIT) &&
943 		 (n->in_redir & NAT_REDIRECT))
944 		n->in_nip = ntohl(n->in_inip);
945 	else
946 		n->in_nip = ntohl(n->in_outip);
947 	if (n->in_redir & NAT_MAP) {
948 		n->in_pnext = ntohs(n->in_pmin);
949 		/*
950 		 * Multiply by the number of ports made available.
951 		 */
952 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
953 			n->in_space *= (ntohs(n->in_pmax) -
954 					ntohs(n->in_pmin) + 1);
955 			/*
956 			 * Because two different sources can map to
957 			 * different destinations but use the same
958 			 * local IP#/port #.
959 			 * If the result is smaller than in_space, then
960 			 * we may have wrapped around 32bits.
961 			 */
962 			i = n->in_inmsk;
963 			if ((i != 0) && (i != 0xffffffff)) {
964 				j = n->in_space * (~ntohl(i) + 1);
965 				if (j >= n->in_space)
966 					n->in_space = j;
967 				else
968 					n->in_space = 0xffffffff;
969 			}
970 		}
971 		/*
972 		 * If no protocol is specified, multiple by 256 to allow for
973 		 * at least one IP:IP mapping per protocol.
974 		 */
975 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
976 				j = n->in_space * 256;
977 				if (j >= n->in_space)
978 					n->in_space = j;
979 				else
980 					n->in_space = 0xffffffff;
981 		}
982 	}
983 
984 	/* Otherwise, these fields are preset */
985 
986 	if (getlock) {
987 		WRITE_ENTER(&ifs->ifs_ipf_nat);
988 	}
989 	n->in_next = NULL;
990 	*np = n;
991 
992 	if (n->in_age[0] != 0)
993 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
994 						  n->in_age[0], ifs);
995 
996 	if (n->in_age[1] != 0)
997 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
998 						  n->in_age[1], ifs);
999 
1000 	if (n->in_redir & NAT_REDIRECT) {
1001 		n->in_flags &= ~IPN_NOTDST;
1002 		nat_addrdr(n, ifs);
1003 	}
1004 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1005 		n->in_flags &= ~IPN_NOTSRC;
1006 		nat_addnat(n, ifs);
1007 	}
1008 	n = NULL;
1009 	ifs->ifs_nat_stats.ns_rules++;
1010 	if (getlock) {
1011 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1012 	}
1013 
1014 	return error;
1015 }
1016 
1017 
1018 /* ------------------------------------------------------------------------ */
1019 /* Function:    nat_resolvrule                                              */
1020 /* Returns:     int - 0 == success, -1 == failure                           */
1021 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1022 /*                                                                          */
1023 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1024 /* any specified interfaces and proxy labels, and determines whether or not */
1025 /* all proxy labels are correctly specified.				    */
1026 /*									    */
1027 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1028 /* ------------------------------------------------------------------------ */
1029 static int nat_resolverule(n, ifs)
1030 ipnat_t *n;
1031 ipf_stack_t *ifs;
1032 {
1033 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1034 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4, ifs);
1035 
1036 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1037 	if (n->in_ifnames[1][0] == '\0') {
1038 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1039 		n->in_ifps[1] = n->in_ifps[0];
1040 	} else {
1041 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4, ifs);
1042 	}
1043 
1044 	if (n->in_plabel[0] != '\0') {
1045 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1046 		if (n->in_apr == NULL)
1047 			return -1;
1048 	}
1049 	return 0;
1050 }
1051 
1052 
1053 /* ------------------------------------------------------------------------ */
1054 /* Function:    nat_siocdelnat                                              */
1055 /* Returns:     int - 0 == success, != 0 == failure                         */
1056 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1057 /*              np(I)      - pointer to where to insert new NAT rule        */
1058 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1059 /* Mutex Locks: ipf_natio                                                   */
1060 /*                                                                          */
1061 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1062 /* from information passed to the kernel, then add it  to the appropriate   */
1063 /* NAT rule table(s).                                                       */
1064 /* ------------------------------------------------------------------------ */
1065 static void nat_siocdelnat(n, np, getlock, ifs)
1066 ipnat_t *n, **np;
1067 int getlock;
1068 ipf_stack_t *ifs;
1069 {
1070 	if (getlock) {
1071 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1072 	}
1073 	if (n->in_redir & NAT_REDIRECT)
1074 		nat_delrdr(n);
1075 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1076 		nat_delnat(n);
1077 	if (ifs->ifs_nat_list == NULL) {
1078 		ifs->ifs_nat_masks = 0;
1079 		ifs->ifs_rdr_masks = 0;
1080 	}
1081 
1082 	if (n->in_tqehead[0] != NULL) {
1083 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1084 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1085 		}
1086 	}
1087 
1088 	if (n->in_tqehead[1] != NULL) {
1089 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1090 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1091 		}
1092 	}
1093 
1094 	*np = n->in_next;
1095 
1096 	if (n->in_use == 0) {
1097 		if (n->in_apr)
1098 			appr_free(n->in_apr);
1099 		KFREE(n);
1100 		ifs->ifs_nat_stats.ns_rules--;
1101 	} else {
1102 		n->in_flags |= IPN_DELETE;
1103 		n->in_next = NULL;
1104 	}
1105 	if (getlock) {
1106 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1107 	}
1108 }
1109 
1110 
1111 /* ------------------------------------------------------------------------ */
1112 /* Function:    fr_natgetsz                                                 */
1113 /* Returns:     int - 0 == success, != 0 is the error value.                */
1114 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1115 /*                        get the size of.                                  */
1116 /*                                                                          */
1117 /* Handle SIOCSTGSZ.                                                        */
1118 /* Return the size of the nat list entry to be copied back to user space.   */
1119 /* The size of the entry is stored in the ng_sz field and the enture natget */
1120 /* structure is copied back to the user.                                    */
1121 /* ------------------------------------------------------------------------ */
1122 static int fr_natgetsz(data, ifs)
1123 caddr_t data;
1124 ipf_stack_t *ifs;
1125 {
1126 	ap_session_t *aps;
1127 	nat_t *nat, *n;
1128 	natget_t ng;
1129 
1130 	BCOPYIN(data, &ng, sizeof(ng));
1131 
1132 	nat = ng.ng_ptr;
1133 	if (!nat) {
1134 		nat = ifs->ifs_nat_instances;
1135 		ng.ng_sz = 0;
1136 		/*
1137 		 * Empty list so the size returned is 0.  Simple.
1138 		 */
1139 		if (nat == NULL) {
1140 			BCOPYOUT(&ng, data, sizeof(ng));
1141 			return 0;
1142 		}
1143 	} else {
1144 		/*
1145 		 * Make sure the pointer we're copying from exists in the
1146 		 * current list of entries.  Security precaution to prevent
1147 		 * copying of random kernel data.
1148 		 */
1149 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1150 			if (n == nat)
1151 				break;
1152 		if (!n)
1153 			return ESRCH;
1154 	}
1155 
1156 	/*
1157 	 * Incluse any space required for proxy data structures.
1158 	 */
1159 	ng.ng_sz = sizeof(nat_save_t);
1160 	aps = nat->nat_aps;
1161 	if (aps != NULL) {
1162 		ng.ng_sz += sizeof(ap_session_t) - 4;
1163 		if (aps->aps_data != 0)
1164 			ng.ng_sz += aps->aps_psiz;
1165 	}
1166 
1167 	BCOPYOUT(&ng, data, sizeof(ng));
1168 	return 0;
1169 }
1170 
1171 
1172 /* ------------------------------------------------------------------------ */
1173 /* Function:    fr_natgetent                                                */
1174 /* Returns:     int - 0 == success, != 0 is the error value.                */
1175 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1176 /*                        to NAT structure to copy out.                     */
1177 /*                                                                          */
1178 /* Handle SIOCSTGET.                                                        */
1179 /* Copies out NAT entry to user space.  Any additional data held for a      */
1180 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1181 /* ------------------------------------------------------------------------ */
1182 static int fr_natgetent(data, ifs)
1183 caddr_t data;
1184 ipf_stack_t *ifs;
1185 {
1186 	int error, outsize;
1187 	ap_session_t *aps;
1188 	nat_save_t *ipn, ipns;
1189 	nat_t *n, *nat;
1190 
1191 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1192 	if (error != 0)
1193 		return error;
1194 
1195 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1196 		return EINVAL;
1197 
1198 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1199 	if (ipn == NULL)
1200 		return ENOMEM;
1201 
1202 	ipn->ipn_dsize = ipns.ipn_dsize;
1203 	nat = ipns.ipn_next;
1204 	if (nat == NULL) {
1205 		nat = ifs->ifs_nat_instances;
1206 		if (nat == NULL) {
1207 			if (ifs->ifs_nat_instances == NULL)
1208 				error = ENOENT;
1209 			goto finished;
1210 		}
1211 	} else {
1212 		/*
1213 		 * Make sure the pointer we're copying from exists in the
1214 		 * current list of entries.  Security precaution to prevent
1215 		 * copying of random kernel data.
1216 		 */
1217 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1218 			if (n == nat)
1219 				break;
1220 		if (n == NULL) {
1221 			error = ESRCH;
1222 			goto finished;
1223 		}
1224 	}
1225 	ipn->ipn_next = nat->nat_next;
1226 
1227 	/*
1228 	 * Copy the NAT structure.
1229 	 */
1230 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1231 
1232 	/*
1233 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1234 	 */
1235 	if (nat->nat_ptr != NULL)
1236 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1237 		      sizeof(ipn->ipn_ipnat));
1238 
1239 	/*
1240 	 * If we also know the NAT entry has an associated filter rule,
1241 	 * save that too.
1242 	 */
1243 	if (nat->nat_fr != NULL)
1244 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1245 		      sizeof(ipn->ipn_fr));
1246 
1247 	/*
1248 	 * Last but not least, if there is an application proxy session set
1249 	 * up for this NAT entry, then copy that out too, including any
1250 	 * private data saved along side it by the proxy.
1251 	 */
1252 	aps = nat->nat_aps;
1253 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1254 	if (aps != NULL) {
1255 		char *s;
1256 
1257 		if (outsize < sizeof(*aps)) {
1258 			error = ENOBUFS;
1259 			goto finished;
1260 		}
1261 
1262 		s = ipn->ipn_data;
1263 		bcopy((char *)aps, s, sizeof(*aps));
1264 		s += sizeof(*aps);
1265 		outsize -= sizeof(*aps);
1266 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1267 			bcopy(aps->aps_data, s, aps->aps_psiz);
1268 		else
1269 			error = ENOBUFS;
1270 	}
1271 	if (error == 0) {
1272 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1273 	}
1274 
1275 finished:
1276 	if (ipn != NULL) {
1277 		KFREES(ipn, ipns.ipn_dsize);
1278 	}
1279 	return error;
1280 }
1281 
1282 
1283 /* ------------------------------------------------------------------------ */
1284 /* Function:    fr_natputent                                                */
1285 /* Returns:     int - 0 == success, != 0 is the error value.                */
1286 /* Parameters:  data(I) -     pointer to natget structure with NAT          */
1287 /*                            structure information to load into the kernel */
1288 /*              getlock(I) - flag indicating whether or not a write lock    */
1289 /*                           on ipf_nat is already held.                    */
1290 /*                                                                          */
1291 /* Handle SIOCSTPUT.                                                        */
1292 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1293 /* firewall rule data structures, if pointers to them indicate so.          */
1294 /* ------------------------------------------------------------------------ */
1295 static int fr_natputent(data, getlock, ifs)
1296 caddr_t data;
1297 int getlock;
1298 ipf_stack_t *ifs;
1299 {
1300 	nat_save_t ipn, *ipnn;
1301 	ap_session_t *aps;
1302 	nat_t *n, *nat;
1303 	frentry_t *fr;
1304 	fr_info_t fin;
1305 	ipnat_t *in;
1306 	int error;
1307 
1308 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1309 	if (error != 0)
1310 		return error;
1311 
1312 	/*
1313 	 * Trigger automatic call to nat_extraflush() if the
1314 	 * table has reached capcity specified by hi watermark.
1315 	 */
1316 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
1317 		ifs->ifs_nat_doflush = 1;
1318 
1319 	/*
1320 	 * Initialise early because of code at junkput label.
1321 	 */
1322 	in = NULL;
1323 	aps = NULL;
1324 	nat = NULL;
1325 	ipnn = NULL;
1326 
1327 	/*
1328 	 * New entry, copy in the rest of the NAT entry if it's size is more
1329 	 * than just the nat_t structure.
1330 	 */
1331 	fr = NULL;
1332 	if (ipn.ipn_dsize > sizeof(ipn)) {
1333 		if (ipn.ipn_dsize > 81920) {
1334 			error = ENOMEM;
1335 			goto junkput;
1336 		}
1337 
1338 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1339 		if (ipnn == NULL)
1340 			return ENOMEM;
1341 
1342 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1343 		if (error != 0) {
1344 			error = EFAULT;
1345 			goto junkput;
1346 		}
1347 	} else
1348 		ipnn = &ipn;
1349 
1350 	KMALLOC(nat, nat_t *);
1351 	if (nat == NULL) {
1352 		error = ENOMEM;
1353 		goto junkput;
1354 	}
1355 
1356 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1357 	/*
1358 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1359 	 */
1360 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1361 	nat->nat_tqe.tqe_pnext = NULL;
1362 	nat->nat_tqe.tqe_next = NULL;
1363 	nat->nat_tqe.tqe_ifq = NULL;
1364 	nat->nat_tqe.tqe_parent = nat;
1365 
1366 	/*
1367 	 * Restore the rule associated with this nat session
1368 	 */
1369 	in = ipnn->ipn_nat.nat_ptr;
1370 	if (in != NULL) {
1371 		KMALLOC(in, ipnat_t *);
1372 		nat->nat_ptr = in;
1373 		if (in == NULL) {
1374 			error = ENOMEM;
1375 			goto junkput;
1376 		}
1377 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1378 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1379 		in->in_use = 1;
1380 		in->in_flags |= IPN_DELETE;
1381 
1382 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1383 
1384 		if (nat_resolverule(in, ifs) != 0) {
1385 			error = ESRCH;
1386 			goto junkput;
1387 		}
1388 	}
1389 
1390 	/*
1391 	 * Check that the NAT entry doesn't already exist in the kernel.
1392 	 */
1393 	bzero((char *)&fin, sizeof(fin));
1394 	fin.fin_p = nat->nat_p;
1395 	fin.fin_ifs = ifs;
1396 	if (nat->nat_dir == NAT_OUTBOUND) {
1397 		fin.fin_data[0] = ntohs(nat->nat_oport);
1398 		fin.fin_data[1] = ntohs(nat->nat_outport);
1399 		fin.fin_ifp = nat->nat_ifps[0];
1400 		if (getlock) {
1401 			READ_ENTER(&ifs->ifs_ipf_nat);
1402 		}
1403 		n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1404 			nat->nat_oip, nat->nat_outip);
1405 		if (getlock) {
1406 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1407 		}
1408 		if (n != NULL) {
1409 			error = EEXIST;
1410 			goto junkput;
1411 		}
1412 	} else if (nat->nat_dir == NAT_INBOUND) {
1413 		fin.fin_data[0] = ntohs(nat->nat_inport);
1414 		fin.fin_data[1] = ntohs(nat->nat_oport);
1415 		fin.fin_ifp = nat->nat_ifps[1];
1416 		if (getlock) {
1417 			READ_ENTER(&ifs->ifs_ipf_nat);
1418 		}
1419 		n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1420 			nat->nat_inip, nat->nat_oip);
1421 		if (getlock) {
1422 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1423 		}
1424 		if (n != NULL) {
1425 			error = EEXIST;
1426 			goto junkput;
1427 		}
1428 	} else {
1429 		error = EINVAL;
1430 		goto junkput;
1431 	}
1432 
1433 	/*
1434 	 * Restore ap_session_t structure.  Include the private data allocated
1435 	 * if it was there.
1436 	 */
1437 	aps = nat->nat_aps;
1438 	if (aps != NULL) {
1439 		KMALLOC(aps, ap_session_t *);
1440 		nat->nat_aps = aps;
1441 		if (aps == NULL) {
1442 			error = ENOMEM;
1443 			goto junkput;
1444 		}
1445 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1446 		if (in != NULL)
1447 			aps->aps_apr = in->in_apr;
1448 		else
1449 			aps->aps_apr = NULL;
1450 		if (aps->aps_psiz != 0) {
1451 			if (aps->aps_psiz > 81920) {
1452 				error = ENOMEM;
1453 				goto junkput;
1454 			}
1455 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1456 			if (aps->aps_data == NULL) {
1457 				error = ENOMEM;
1458 				goto junkput;
1459 			}
1460 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1461 			      aps->aps_psiz);
1462 		} else {
1463 			aps->aps_psiz = 0;
1464 			aps->aps_data = NULL;
1465 		}
1466 	}
1467 
1468 	/*
1469 	 * If there was a filtering rule associated with this entry then
1470 	 * build up a new one.
1471 	 */
1472 	fr = nat->nat_fr;
1473 	if (fr != NULL) {
1474 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1475 			KMALLOC(fr, frentry_t *);
1476 			nat->nat_fr = fr;
1477 			if (fr == NULL) {
1478 				error = ENOMEM;
1479 				goto junkput;
1480 			}
1481 			ipnn->ipn_nat.nat_fr = fr;
1482 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1483 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1484 
1485 			fr->fr_ref = 1;
1486 			fr->fr_dsize = 0;
1487 			fr->fr_data = NULL;
1488 			fr->fr_type = FR_T_NONE;
1489 
1490 			MUTEX_NUKE(&fr->fr_lock);
1491 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1492 		} else {
1493 			if (getlock) {
1494 				READ_ENTER(&ifs->ifs_ipf_nat);
1495 			}
1496 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1497 				if (n->nat_fr == fr)
1498 					break;
1499 
1500 			if (n != NULL) {
1501 				MUTEX_ENTER(&fr->fr_lock);
1502 				fr->fr_ref++;
1503 				MUTEX_EXIT(&fr->fr_lock);
1504 			}
1505 			if (getlock) {
1506 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1507 			}
1508 			if (!n) {
1509 				error = ESRCH;
1510 				goto junkput;
1511 			}
1512 		}
1513 	}
1514 
1515 	if (ipnn != &ipn) {
1516 		KFREES(ipnn, ipn.ipn_dsize);
1517 		ipnn = NULL;
1518 	}
1519 
1520 	if (getlock) {
1521 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1522 	}
1523 	error = nat_insert(nat, nat->nat_rev, ifs);
1524 	if ((error == 0) && (aps != NULL)) {
1525 		aps->aps_next = ifs->ifs_ap_sess_list;
1526 		ifs->ifs_ap_sess_list = aps;
1527 	}
1528 	if (getlock) {
1529 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1530 	}
1531 
1532 	if (error == 0)
1533 		return 0;
1534 
1535 	error = ENOMEM;
1536 
1537 junkput:
1538 	if (fr != NULL)
1539 		(void) fr_derefrule(&fr, ifs);
1540 
1541 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1542 		KFREES(ipnn, ipn.ipn_dsize);
1543 	}
1544 	if (nat != NULL) {
1545 		if (aps != NULL) {
1546 			if (aps->aps_data != NULL) {
1547 				KFREES(aps->aps_data, aps->aps_psiz);
1548 			}
1549 			KFREE(aps);
1550 		}
1551 		if (in != NULL) {
1552 			if (in->in_apr)
1553 				appr_free(in->in_apr);
1554 			KFREE(in);
1555 		}
1556 		KFREE(nat);
1557 	}
1558 	return error;
1559 }
1560 
1561 
1562 /* ------------------------------------------------------------------------ */
1563 /* Function:    nat_delete                                                  */
1564 /* Returns:     Nil                                                         */
1565 /* Parameters:  natd(I)    - pointer to NAT structure to delete             */
1566 /*              logtype(I) - type of LOG record to create before deleting   */
1567 /* Write Lock:  ipf_nat                                                     */
1568 /*                                                                          */
1569 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1570 /* enabled then generate a NAT log record for this event.                   */
1571 /* ------------------------------------------------------------------------ */
1572 static void nat_delete(nat, logtype, ifs)
1573 struct nat *nat;
1574 int logtype;
1575 ipf_stack_t *ifs;
1576 {
1577 	struct ipnat *ipn;
1578 
1579 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1580 		nat_log(nat, logtype, ifs);
1581 
1582 	/*
1583 	 * Take it as a general indication that all the pointers are set if
1584 	 * nat_pnext is set.
1585 	 */
1586 	if (nat->nat_pnext != NULL) {
1587 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1588 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1589 
1590 		*nat->nat_pnext = nat->nat_next;
1591 		if (nat->nat_next != NULL) {
1592 			nat->nat_next->nat_pnext = nat->nat_pnext;
1593 			nat->nat_next = NULL;
1594 		}
1595 		nat->nat_pnext = NULL;
1596 
1597 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1598 		if (nat->nat_hnext[0] != NULL) {
1599 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1600 			nat->nat_hnext[0] = NULL;
1601 		}
1602 		nat->nat_phnext[0] = NULL;
1603 
1604 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1605 		if (nat->nat_hnext[1] != NULL) {
1606 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1607 			nat->nat_hnext[1] = NULL;
1608 		}
1609 		nat->nat_phnext[1] = NULL;
1610 
1611 		if ((nat->nat_flags & SI_WILDP) != 0)
1612 			ifs->ifs_nat_stats.ns_wilds--;
1613 	}
1614 
1615 	if (nat->nat_me != NULL) {
1616 		*nat->nat_me = NULL;
1617 		nat->nat_me = NULL;
1618 	}
1619 
1620 	fr_deletequeueentry(&nat->nat_tqe);
1621 
1622 	MUTEX_ENTER(&nat->nat_lock);
1623 	if (nat->nat_ref > 1) {
1624 		nat->nat_ref--;
1625 		MUTEX_EXIT(&nat->nat_lock);
1626 		return;
1627 	}
1628 	MUTEX_EXIT(&nat->nat_lock);
1629 
1630 	/*
1631 	 * At this point, nat_ref is 1, doing "--" would make it 0..
1632 	 */
1633 	nat->nat_ref = 0;
1634 
1635 #ifdef	IPFILTER_SYNC
1636 	if (nat->nat_sync)
1637 		ipfsync_del(nat->nat_sync);
1638 #endif
1639 
1640 	if (nat->nat_fr != NULL)
1641 		(void)fr_derefrule(&nat->nat_fr, ifs);
1642 
1643 	if (nat->nat_hm != NULL)
1644 		fr_hostmapdel(&nat->nat_hm);
1645 
1646 	/*
1647 	 * If there is an active reference from the nat entry to its parent
1648 	 * rule, decrement the rule's reference count and free it too if no
1649 	 * longer being used.
1650 	 */
1651 	ipn = nat->nat_ptr;
1652 	if (ipn != NULL) {
1653 		ipn->in_space++;
1654 		ipn->in_use--;
1655 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
1656 			if (ipn->in_apr)
1657 				appr_free(ipn->in_apr);
1658 			KFREE(ipn);
1659 			ifs->ifs_nat_stats.ns_rules--;
1660 		}
1661 	}
1662 
1663 	MUTEX_DESTROY(&nat->nat_lock);
1664 
1665 	aps_free(nat->nat_aps, ifs);
1666 	ifs->ifs_nat_stats.ns_inuse--;
1667 
1668 	/*
1669 	 * If there's a fragment table entry too for this nat entry, then
1670 	 * dereference that as well.  This is after nat_lock is released
1671 	 * because of Tru64.
1672 	 */
1673 	fr_forgetnat((void *)nat, ifs);
1674 
1675 	KFREE(nat);
1676 }
1677 
1678 
1679 /* ------------------------------------------------------------------------ */
1680 /* Function:    nat_flushtable                                              */
1681 /* Returns:     int - number of NAT rules deleted                           */
1682 /* Parameters:  Nil                                                         */
1683 /*                                                                          */
1684 /* Deletes all currently active NAT sessions.  In deleting each NAT entry a */
1685 /* log record should be emitted in nat_delete() if NAT logging is enabled.  */
1686 /* ------------------------------------------------------------------------ */
1687 /*
1688  * nat_flushtable - clear the NAT table of all mapping entries.
1689  */
1690 static int nat_flushtable(ifs)
1691 ipf_stack_t *ifs;
1692 {
1693 	nat_t *nat;
1694 	int j = 0;
1695 
1696 	/*
1697 	 * ALL NAT mappings deleted, so lets just make the deletions
1698 	 * quicker.
1699 	 */
1700 	if (ifs->ifs_nat_table[0] != NULL)
1701 		bzero((char *)ifs->ifs_nat_table[0],
1702 		      sizeof(ifs->ifs_nat_table[0]) * ifs->ifs_ipf_nattable_sz);
1703 	if (ifs->ifs_nat_table[1] != NULL)
1704 		bzero((char *)ifs->ifs_nat_table[1],
1705 		      sizeof(ifs->ifs_nat_table[1]) * ifs->ifs_ipf_nattable_sz);
1706 
1707 	while ((nat = ifs->ifs_nat_instances) != NULL) {
1708 		nat_delete(nat, NL_FLUSH, ifs);
1709 		j++;
1710 	}
1711 
1712 	ifs->ifs_nat_stats.ns_inuse = 0;
1713 	return j;
1714 }
1715 
1716 
1717 /* ------------------------------------------------------------------------ */
1718 /* Function:    nat_clearlist                                               */
1719 /* Returns:     int - number of NAT/RDR rules deleted                       */
1720 /* Parameters:  Nil                                                         */
1721 /*                                                                          */
1722 /* Delete all rules in the current list of rules.  There is nothing elegant */
1723 /* about this cleanup: simply free all entries on the list of rules and     */
1724 /* clear out the tables used for hashed NAT rule lookups.                   */
1725 /* ------------------------------------------------------------------------ */
1726 static int nat_clearlist(ifs)
1727 ipf_stack_t *ifs;
1728 {
1729 	ipnat_t *n, **np = &ifs->ifs_nat_list;
1730 	int i = 0;
1731 
1732 	if (ifs->ifs_nat_rules != NULL)
1733 		bzero((char *)ifs->ifs_nat_rules,
1734 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
1735 	if (ifs->ifs_rdr_rules != NULL)
1736 		bzero((char *)ifs->ifs_rdr_rules,
1737 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
1738 
1739 	while ((n = *np) != NULL) {
1740 		*np = n->in_next;
1741 		if (n->in_use == 0) {
1742 			if (n->in_apr != NULL)
1743 				appr_free(n->in_apr);
1744 			KFREE(n);
1745 			ifs->ifs_nat_stats.ns_rules--;
1746 		} else {
1747 			n->in_flags |= IPN_DELETE;
1748 			n->in_next = NULL;
1749 		}
1750 		i++;
1751 	}
1752 	ifs->ifs_nat_masks = 0;
1753 	ifs->ifs_rdr_masks = 0;
1754 	return i;
1755 }
1756 
1757 
1758 /* ------------------------------------------------------------------------ */
1759 /* Function:    nat_newmap                                                  */
1760 /* Returns:     int - -1 == error, 0 == success                             */
1761 /* Parameters:  fin(I) - pointer to packet information                      */
1762 /*              nat(I) - pointer to NAT entry                               */
1763 /*              ni(I)  - pointer to structure with misc. information needed */
1764 /*                       to create new NAT entry.                           */
1765 /*                                                                          */
1766 /* Given an empty NAT structure, populate it with new information about a   */
1767 /* new NAT session, as defined by the matching NAT rule.                    */
1768 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
1769 /* to the new IP address for the translation.                               */
1770 /* ------------------------------------------------------------------------ */
1771 static INLINE int nat_newmap(fin, nat, ni)
1772 fr_info_t *fin;
1773 nat_t *nat;
1774 natinfo_t *ni;
1775 {
1776 	u_short st_port, dport, sport, port, sp, dp;
1777 	struct in_addr in, inb;
1778 	hostmap_t *hm;
1779 	u_32_t flags;
1780 	u_32_t st_ip;
1781 	ipnat_t *np;
1782 	nat_t *natl;
1783 	int l;
1784 	ipf_stack_t *ifs = fin->fin_ifs;
1785 
1786 	/*
1787 	 * If it's an outbound packet which doesn't match any existing
1788 	 * record, then create a new port
1789 	 */
1790 	l = 0;
1791 	hm = NULL;
1792 	np = ni->nai_np;
1793 	st_ip = np->in_nip;
1794 	st_port = np->in_pnext;
1795 	flags = ni->nai_flags;
1796 	sport = ni->nai_sport;
1797 	dport = ni->nai_dport;
1798 
1799 	/*
1800 	 * Do a loop until we either run out of entries to try or we find
1801 	 * a NAT mapping that isn't currently being used.  This is done
1802 	 * because the change to the source is not (usually) being fixed.
1803 	 */
1804 	do {
1805 		port = 0;
1806 		in.s_addr = htonl(np->in_nip);
1807 		if (l == 0) {
1808 			/*
1809 			 * Check to see if there is an existing NAT
1810 			 * setup for this IP address pair.
1811 			 */
1812 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1813 					 in, 0, ifs);
1814 			if (hm != NULL)
1815 				in.s_addr = hm->hm_mapip.s_addr;
1816 		} else if ((l == 1) && (hm != NULL)) {
1817 			fr_hostmapdel(&hm);
1818 		}
1819 		in.s_addr = ntohl(in.s_addr);
1820 
1821 		nat->nat_hm = hm;
1822 
1823 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
1824 			if (l > 0)
1825 				return -1;
1826 		}
1827 
1828 		if (np->in_redir == NAT_BIMAP &&
1829 		    np->in_inmsk == np->in_outmsk) {
1830 			/*
1831 			 * map the address block in a 1:1 fashion
1832 			 */
1833 			in.s_addr = np->in_outip;
1834 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
1835 			in.s_addr = ntohl(in.s_addr);
1836 
1837 		} else if (np->in_redir & NAT_MAPBLK) {
1838 			if ((l >= np->in_ppip) || ((l > 0) &&
1839 			     !(flags & IPN_TCPUDP)))
1840 				return -1;
1841 			/*
1842 			 * map-block - Calculate destination address.
1843 			 */
1844 			in.s_addr = ntohl(fin->fin_saddr);
1845 			in.s_addr &= ntohl(~np->in_inmsk);
1846 			inb.s_addr = in.s_addr;
1847 			in.s_addr /= np->in_ippip;
1848 			in.s_addr &= ntohl(~np->in_outmsk);
1849 			in.s_addr += ntohl(np->in_outip);
1850 			/*
1851 			 * Calculate destination port.
1852 			 */
1853 			if ((flags & IPN_TCPUDP) &&
1854 			    (np->in_ppip != 0)) {
1855 				port = ntohs(sport) + l;
1856 				port %= np->in_ppip;
1857 				port += np->in_ppip *
1858 					(inb.s_addr % np->in_ippip);
1859 				port += MAPBLK_MINPORT;
1860 				port = htons(port);
1861 			}
1862 
1863 		} else if ((np->in_outip == 0) &&
1864 			   (np->in_outmsk == 0xffffffff)) {
1865 			/*
1866 			 * 0/32 - use the interface's IP address.
1867 			 */
1868 			if ((l > 0) ||
1869 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
1870 				       &in, NULL, fin->fin_ifs) == -1)
1871 				return -1;
1872 			in.s_addr = ntohl(in.s_addr);
1873 
1874 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
1875 			/*
1876 			 * 0/0 - use the original source address/port.
1877 			 */
1878 			if (l > 0)
1879 				return -1;
1880 			in.s_addr = ntohl(fin->fin_saddr);
1881 
1882 		} else if ((np->in_outmsk != 0xffffffff) &&
1883 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
1884 			np->in_nip++;
1885 
1886 		natl = NULL;
1887 
1888 		if ((flags & IPN_TCPUDP) &&
1889 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
1890 		    (np->in_flags & IPN_AUTOPORTMAP)) {
1891 			/*
1892 			 * "ports auto" (without map-block)
1893 			 */
1894 			if ((l > 0) && (l % np->in_ppip == 0)) {
1895 				if (l > np->in_space) {
1896 					return -1;
1897 				} else if ((l > np->in_ppip) &&
1898 					   np->in_outmsk != 0xffffffff)
1899 					np->in_nip++;
1900 			}
1901 			if (np->in_ppip != 0) {
1902 				port = ntohs(sport);
1903 				port += (l % np->in_ppip);
1904 				port %= np->in_ppip;
1905 				port += np->in_ppip *
1906 					(ntohl(fin->fin_saddr) %
1907 					 np->in_ippip);
1908 				port += MAPBLK_MINPORT;
1909 				port = htons(port);
1910 			}
1911 
1912 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1913 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
1914 			/*
1915 			 * Standard port translation.  Select next port.
1916 			 */
1917 			port = htons(np->in_pnext++);
1918 
1919 			if (np->in_pnext > ntohs(np->in_pmax)) {
1920 				np->in_pnext = ntohs(np->in_pmin);
1921 				if (np->in_outmsk != 0xffffffff)
1922 					np->in_nip++;
1923 			}
1924 		}
1925 
1926 		if (np->in_flags & IPN_IPRANGE) {
1927 			if (np->in_nip > ntohl(np->in_outmsk))
1928 				np->in_nip = ntohl(np->in_outip);
1929 		} else {
1930 			if ((np->in_outmsk != 0xffffffff) &&
1931 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1932 			    ntohl(np->in_outip))
1933 				np->in_nip = ntohl(np->in_outip) + 1;
1934 		}
1935 
1936 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
1937 			port = sport;
1938 
1939 		/*
1940 		 * Here we do a lookup of the connection as seen from
1941 		 * the outside.  If an IP# pair already exists, try
1942 		 * again.  So if you have A->B becomes C->B, you can
1943 		 * also have D->E become C->E but not D->B causing
1944 		 * another C->B.  Also take protocol and ports into
1945 		 * account when determining whether a pre-existing
1946 		 * NAT setup will cause an external conflict where
1947 		 * this is appropriate.
1948 		 */
1949 		inb.s_addr = htonl(in.s_addr);
1950 		sp = fin->fin_data[0];
1951 		dp = fin->fin_data[1];
1952 		fin->fin_data[0] = fin->fin_data[1];
1953 		fin->fin_data[1] = htons(port);
1954 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
1955 				    (u_int)fin->fin_p, fin->fin_dst, inb);
1956 		fin->fin_data[0] = sp;
1957 		fin->fin_data[1] = dp;
1958 
1959 		/*
1960 		 * Has the search wrapped around and come back to the
1961 		 * start ?
1962 		 */
1963 		if ((natl != NULL) &&
1964 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1965 		    (np->in_nip != 0) && (st_ip == np->in_nip))
1966 			return -1;
1967 		l++;
1968 	} while (natl != NULL);
1969 
1970 	if (np->in_space > 0)
1971 		np->in_space--;
1972 
1973 	/* Setup the NAT table */
1974 	nat->nat_inip = fin->fin_src;
1975 	nat->nat_outip.s_addr = htonl(in.s_addr);
1976 	nat->nat_oip = fin->fin_dst;
1977 	if (nat->nat_hm == NULL)
1978 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
1979 					  nat->nat_outip, 0, ifs);
1980 
1981 	/*
1982 	 * The ICMP checksum does not have a pseudo header containing
1983 	 * the IP addresses
1984 	 */
1985 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1986 	ni->nai_sum2 = LONG_SUM(in.s_addr);
1987 	if ((flags & IPN_TCPUDP)) {
1988 		ni->nai_sum1 += ntohs(sport);
1989 		ni->nai_sum2 += ntohs(port);
1990 	}
1991 
1992 	if (flags & IPN_TCPUDP) {
1993 		nat->nat_inport = sport;
1994 		nat->nat_outport = port;	/* sport */
1995 		nat->nat_oport = dport;
1996 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
1997 	} else if (flags & IPN_ICMPQUERY) {
1998 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
1999 		nat->nat_inport = port;
2000 		nat->nat_outport = port;
2001 	}
2002 
2003 	ni->nai_ip.s_addr = in.s_addr;
2004 	ni->nai_port = port;
2005 	ni->nai_nport = dport;
2006 	return 0;
2007 }
2008 
2009 
2010 /* ------------------------------------------------------------------------ */
2011 /* Function:    nat_newrdr                                                  */
2012 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2013 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2014 /* Parameters:  fin(I) - pointer to packet information                      */
2015 /*              nat(I) - pointer to NAT entry                               */
2016 /*              ni(I)  - pointer to structure with misc. information needed */
2017 /*                       to create new NAT entry.                           */
2018 /*                                                                          */
2019 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2020 /* to the new IP address for the translation.                               */
2021 /* ------------------------------------------------------------------------ */
2022 static INLINE int nat_newrdr(fin, nat, ni)
2023 fr_info_t *fin;
2024 nat_t *nat;
2025 natinfo_t *ni;
2026 {
2027 	u_short nport, dport, sport;
2028 	struct in_addr in;
2029 	hostmap_t *hm;
2030 	u_32_t flags;
2031 	ipnat_t *np;
2032 	int move;
2033 	ipf_stack_t *ifs = fin->fin_ifs;
2034 
2035 	move = 1;
2036 	hm = NULL;
2037 	in.s_addr = 0;
2038 	np = ni->nai_np;
2039 	flags = ni->nai_flags;
2040 	sport = ni->nai_sport;
2041 	dport = ni->nai_dport;
2042 
2043 	/*
2044 	 * If the matching rule has IPN_STICKY set, then we want to have the
2045 	 * same rule kick in as before.  Why would this happen?  If you have
2046 	 * a collection of rdr rules with "round-robin sticky", the current
2047 	 * packet might match a different one to the previous connection but
2048 	 * we want the same destination to be used.
2049 	 */
2050 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2051 	    (IPN_ROUNDR|IPN_STICKY)) {
2052 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2053 				 (u_32_t)dport, ifs);
2054 		if (hm != NULL) {
2055 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2056 			np = hm->hm_ipnat;
2057 			ni->nai_np = np;
2058 			move = 0;
2059 		}
2060 	}
2061 
2062 	/*
2063 	 * Otherwise, it's an inbound packet. Most likely, we don't
2064 	 * want to rewrite source ports and source addresses. Instead,
2065 	 * we want to rewrite to a fixed internal address and fixed
2066 	 * internal port.
2067 	 */
2068 	if (np->in_flags & IPN_SPLIT) {
2069 		in.s_addr = np->in_nip;
2070 
2071 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2072 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2073 					 in, (u_32_t)dport, ifs);
2074 			if (hm != NULL) {
2075 				in.s_addr = hm->hm_mapip.s_addr;
2076 				move = 0;
2077 			}
2078 		}
2079 
2080 		if (hm == NULL || hm->hm_ref == 1) {
2081 			if (np->in_inip == htonl(in.s_addr)) {
2082 				np->in_nip = ntohl(np->in_inmsk);
2083 				move = 0;
2084 			} else {
2085 				np->in_nip = ntohl(np->in_inip);
2086 			}
2087 		}
2088 
2089 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2090 		/*
2091 		 * 0/32 - use the interface's IP address.
2092 		 */
2093 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2094 			   fin->fin_ifs) == -1)
2095 			return -1;
2096 		in.s_addr = ntohl(in.s_addr);
2097 
2098 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2099 		/*
2100 		 * 0/0 - use the original destination address/port.
2101 		 */
2102 		in.s_addr = ntohl(fin->fin_daddr);
2103 
2104 	} else if (np->in_redir == NAT_BIMAP &&
2105 		   np->in_inmsk == np->in_outmsk) {
2106 		/*
2107 		 * map the address block in a 1:1 fashion
2108 		 */
2109 		in.s_addr = np->in_inip;
2110 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2111 		in.s_addr = ntohl(in.s_addr);
2112 	} else {
2113 		in.s_addr = ntohl(np->in_inip);
2114 	}
2115 
2116 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2117 		nport = dport;
2118 	else {
2119 		/*
2120 		 * Whilst not optimized for the case where
2121 		 * pmin == pmax, the gain is not significant.
2122 		 */
2123 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2124 		    (np->in_pmin != np->in_pmax)) {
2125 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2126 				ntohs(np->in_pnext);
2127 			nport = htons(nport);
2128 		} else
2129 			nport = np->in_pnext;
2130 	}
2131 
2132 	/*
2133 	 * When the redirect-to address is set to 0.0.0.0, just
2134 	 * assume a blank `forwarding' of the packet.  We don't
2135 	 * setup any translation for this either.
2136 	 */
2137 	if (in.s_addr == 0) {
2138 		if (nport == dport)
2139 			return -1;
2140 		in.s_addr = ntohl(fin->fin_daddr);
2141 	}
2142 
2143 	nat->nat_inip.s_addr = htonl(in.s_addr);
2144 	nat->nat_outip = fin->fin_dst;
2145 	nat->nat_oip = fin->fin_src;
2146 
2147 	ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
2148 	ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
2149 
2150 	ni->nai_ip.s_addr = in.s_addr;
2151 	ni->nai_nport = nport;
2152 	ni->nai_port = sport;
2153 
2154 	if (flags & IPN_TCPUDP) {
2155 		nat->nat_inport = nport;
2156 		nat->nat_outport = dport;
2157 		nat->nat_oport = sport;
2158 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2159 	} else if (flags & IPN_ICMPQUERY) {
2160 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2161 		nat->nat_inport = nport;
2162 		nat->nat_outport = nport;
2163 	}
2164 
2165 	return move;
2166 }
2167 
2168 /* ------------------------------------------------------------------------ */
2169 /* Function:    nat_new                                                     */
2170 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2171 /*                       else pointer to new NAT structure                  */
2172 /* Parameters:  fin(I)       - pointer to packet information                */
2173 /*              np(I)        - pointer to NAT rule                          */
2174 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2175 /*              flags(I)     - flags describing the current packet          */
2176 /*              direction(I) - direction of packet (in/out)                 */
2177 /* Write Lock:  ipf_nat                                                     */
2178 /*                                                                          */
2179 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2180 /* in any way.                                                              */
2181 /*                                                                          */
2182 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2183 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2184 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2185 /* and (3) building that structure and putting it into the NAT table(s).    */
2186 /* ------------------------------------------------------------------------ */
2187 nat_t *nat_new(fin, np, natsave, flags, direction)
2188 fr_info_t *fin;
2189 ipnat_t *np;
2190 nat_t **natsave;
2191 u_int flags;
2192 int direction;
2193 {
2194 	u_short port = 0, sport = 0, dport = 0, nport = 0;
2195 	tcphdr_t *tcp = NULL;
2196 	hostmap_t *hm = NULL;
2197 	struct in_addr in;
2198 	nat_t *nat, *natl;
2199 	u_int nflags;
2200 	natinfo_t ni;
2201 	u_32_t sumd;
2202 	int move;
2203 	ipf_stack_t *ifs = fin->fin_ifs;
2204 
2205 	/*
2206 	 * Trigger automatic call to nat_extraflush() if the
2207 	 * table has reached capcity specified by hi watermark.
2208 	 */
2209 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_hi)
2210 		ifs->ifs_nat_doflush = 1;
2211 
2212 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2213 		ifs->ifs_nat_stats.ns_memfail++;
2214 		return NULL;
2215 	}
2216 
2217 	move = 1;
2218 	nflags = np->in_flags & flags;
2219 	nflags &= NAT_FROMRULE;
2220 
2221 	ni.nai_np = np;
2222 	ni.nai_nflags = nflags;
2223 	ni.nai_flags = flags;
2224 
2225 	/* Give me a new nat */
2226 	KMALLOC(nat, nat_t *);
2227 	if (nat == NULL) {
2228 		ifs->ifs_nat_stats.ns_memfail++;
2229 		/*
2230 		 * Try to automatically tune the max # of entries in the
2231 		 * table allowed to be less than what will cause kmem_alloc()
2232 		 * to fail and try to eliminate panics due to out of memory
2233 		 * conditions arising.
2234 		 */
2235 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2236 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2237 			printf("ipf_nattable_max reduced to %d\n",
2238 				ifs->ifs_ipf_nattable_max);
2239 		}
2240 		return NULL;
2241 	}
2242 
2243 	if (flags & IPN_TCPUDP) {
2244 		tcp = fin->fin_dp;
2245 		ni.nai_sport = htons(fin->fin_sport);
2246 		ni.nai_dport = htons(fin->fin_dport);
2247 	} else if (flags & IPN_ICMPQUERY) {
2248 		/*
2249 		 * In the ICMP query NAT code, we translate the ICMP id fields
2250 		 * to make them unique. This is indepedent of the ICMP type
2251 		 * (e.g. in the unlikely event that a host sends an echo and
2252 		 * an tstamp request with the same id, both packets will have
2253 		 * their ip address/id field changed in the same way).
2254 		 */
2255 		/* The icmp_id field is used by the sender to identify the
2256 		 * process making the icmp request. (the receiver justs
2257 		 * copies it back in its response). So, it closely matches
2258 		 * the concept of source port. We overlay sport, so we can
2259 		 * maximally reuse the existing code.
2260 		 */
2261 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2262 		ni.nai_dport = ni.nai_sport;
2263 	}
2264 
2265 	bzero((char *)nat, sizeof(*nat));
2266 	nat->nat_flags = flags;
2267 	nat->nat_redir = np->in_redir;
2268 
2269 	if ((flags & NAT_SLAVE) == 0) {
2270 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2271 	}
2272 
2273 	/*
2274 	 * Search the current table for a match.
2275 	 */
2276 	if (direction == NAT_OUTBOUND) {
2277 		/*
2278 		 * We can now arrange to call this for the same connection
2279 		 * because ipf_nat_new doesn't protect the code path into
2280 		 * this function.
2281 		 */
2282 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2283 				     fin->fin_src, fin->fin_dst);
2284 		if (natl != NULL) {
2285 			KFREE(nat);
2286 			nat = natl;
2287 			goto done;
2288 		}
2289 
2290 		move = nat_newmap(fin, nat, &ni);
2291 		if (move == -1)
2292 			goto badnat;
2293 
2294 		np = ni.nai_np;
2295 		in = ni.nai_ip;
2296 	} else {
2297 		/*
2298 		 * NAT_INBOUND is used only for redirects rules
2299 		 */
2300 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2301 				    fin->fin_src, fin->fin_dst);
2302 		if (natl != NULL) {
2303 			KFREE(nat);
2304 			nat = natl;
2305 			goto done;
2306 		}
2307 
2308 		move = nat_newrdr(fin, nat, &ni);
2309 		if (move == -1)
2310 			goto badnat;
2311 
2312 		np = ni.nai_np;
2313 		in = ni.nai_ip;
2314 	}
2315 	port = ni.nai_port;
2316 	nport = ni.nai_nport;
2317 
2318 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2319 		if (np->in_redir == NAT_REDIRECT) {
2320 			nat_delrdr(np);
2321 			nat_addrdr(np, ifs);
2322 		} else if (np->in_redir == NAT_MAP) {
2323 			nat_delnat(np);
2324 			nat_addnat(np, ifs);
2325 		}
2326 	}
2327 
2328 	if (flags & IPN_TCPUDP) {
2329 		sport = ni.nai_sport;
2330 		dport = ni.nai_dport;
2331 	} else if (flags & IPN_ICMPQUERY) {
2332 		sport = ni.nai_sport;
2333 		dport = 0;
2334 	}
2335 
2336 	/*
2337 	 * nat_sumd[0] stores adjustment value including both IP address and
2338 	 * port number changes. nat_sumd[1] stores adjustment value only for
2339 	 * IP address changes, to be used for pseudo header adjustment, in
2340 	 * case hardware partial checksum offload is offered.
2341 	 */
2342 	CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2343 	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2344 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
2345 	if (flags & IPN_TCPUDP) {
2346 		ni.nai_sum1 = LONG_SUM(in.s_addr);
2347 		if (direction == NAT_OUTBOUND)
2348 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_saddr));
2349 		else
2350 			ni.nai_sum2 = LONG_SUM(ntohl(fin->fin_daddr));
2351 
2352 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2353 		nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
2354 	} else
2355 #endif
2356 		nat->nat_sumd[1] = nat->nat_sumd[0];
2357 
2358 	if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) {
2359 		if (direction == NAT_OUTBOUND)
2360 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr));
2361 		else
2362 			ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr));
2363 
2364 		ni.nai_sum2 = LONG_SUM(in.s_addr);
2365 
2366 		CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd);
2367 		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
2368 	} else {
2369 		nat->nat_ipsumd = nat->nat_sumd[0];
2370 		if (!(flags & IPN_TCPUDPICMP)) {
2371 			nat->nat_sumd[0] = 0;
2372 			nat->nat_sumd[1] = 0;
2373 		}
2374 	}
2375 
2376 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2377 		goto badnat;
2378 	}
2379 	if (flags & SI_WILDP)
2380 		ifs->ifs_nat_stats.ns_wilds++;
2381 	goto done;
2382 badnat:
2383 	ifs->ifs_nat_stats.ns_badnat++;
2384 	if ((hm = nat->nat_hm) != NULL)
2385 		fr_hostmapdel(&hm);
2386 	KFREE(nat);
2387 	nat = NULL;
2388 done:
2389 	if ((flags & NAT_SLAVE) == 0) {
2390 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2391 	}
2392 	return nat;
2393 }
2394 
2395 
2396 /* ------------------------------------------------------------------------ */
2397 /* Function:    nat_finalise                                                */
2398 /* Returns:     int - 0 == sucess, -1 == failure                            */
2399 /* Parameters:  fin(I) - pointer to packet information                      */
2400 /*              nat(I) - pointer to NAT entry                               */
2401 /*              ni(I)  - pointer to structure with misc. information needed */
2402 /*                       to create new NAT entry.                           */
2403 /* Write Lock:  ipf_nat                                                     */
2404 /*                                                                          */
2405 /* This is the tail end of constructing a new NAT entry and is the same     */
2406 /* for both IPv4 and IPv6.                                                  */
2407 /* ------------------------------------------------------------------------ */
2408 /*ARGSUSED*/
2409 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2410 fr_info_t *fin;
2411 nat_t *nat;
2412 natinfo_t *ni;
2413 tcphdr_t *tcp;
2414 nat_t **natsave;
2415 int direction;
2416 {
2417 	frentry_t *fr;
2418 	ipnat_t *np;
2419 	ipf_stack_t *ifs = fin->fin_ifs;
2420 
2421 	np = ni->nai_np;
2422 
2423 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2424 
2425 #ifdef	IPFILTER_SYNC
2426 	if ((nat->nat_flags & SI_CLONE) == 0)
2427 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2428 #endif
2429 
2430 	nat->nat_me = natsave;
2431 	nat->nat_dir = direction;
2432 	nat->nat_ifps[0] = np->in_ifps[0];
2433 	nat->nat_ifps[1] = np->in_ifps[1];
2434 	nat->nat_ptr = np;
2435 	nat->nat_p = fin->fin_p;
2436 	nat->nat_mssclamp = np->in_mssclamp;
2437 	fr = fin->fin_fr;
2438 	nat->nat_fr = fr;
2439 
2440 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2441 		if (appr_new(fin, nat) == -1)
2442 			return -1;
2443 
2444 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2445 		if (ifs->ifs_nat_logging)
2446 			nat_log(nat, (u_int)np->in_redir, ifs);
2447 		np->in_use++;
2448 		if (fr != NULL) {
2449 			MUTEX_ENTER(&fr->fr_lock);
2450 			fr->fr_ref++;
2451 			MUTEX_EXIT(&fr->fr_lock);
2452 		}
2453 		return 0;
2454 	}
2455 
2456 	/*
2457 	 * nat_insert failed, so cleanup time...
2458 	 */
2459 	return -1;
2460 }
2461 
2462 
2463 /* ------------------------------------------------------------------------ */
2464 /* Function:   nat_insert                                                   */
2465 /* Returns:    int - 0 == sucess, -1 == failure                             */
2466 /* Parameters: nat(I) - pointer to NAT structure                            */
2467 /*             rev(I) - flag indicating forward/reverse direction of packet */
2468 /* Write Lock: ipf_nat                                                      */
2469 /*                                                                          */
2470 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2471 /* list of active NAT entries.  Adjust global counters when complete.       */
2472 /* ------------------------------------------------------------------------ */
2473 int	nat_insert(nat, rev, ifs)
2474 nat_t	*nat;
2475 int	rev;
2476 ipf_stack_t *ifs;
2477 {
2478 	u_int hv1, hv2;
2479 	nat_t **natp;
2480 
2481 	/*
2482 	 * Try and return an error as early as possible, so calculate the hash
2483 	 * entry numbers first and then proceed.
2484 	 */
2485 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2486 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2487 				  0xffffffff);
2488 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2489 				  ifs->ifs_ipf_nattable_sz);
2490 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2491 				  0xffffffff);
2492 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2493 				  ifs->ifs_ipf_nattable_sz);
2494 	} else {
2495 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2496 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2497 				  ifs->ifs_ipf_nattable_sz);
2498 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2499 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2500 				  ifs->ifs_ipf_nattable_sz);
2501 	}
2502 
2503 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2504 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2505 		return -1;
2506 	}
2507 
2508 	nat->nat_hv[0] = hv1;
2509 	nat->nat_hv[1] = hv2;
2510 
2511 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2512 
2513 	nat->nat_rev = rev;
2514 	nat->nat_ref = 1;
2515 	nat->nat_bytes[0] = 0;
2516 	nat->nat_pkts[0] = 0;
2517 	nat->nat_bytes[1] = 0;
2518 	nat->nat_pkts[1] = 0;
2519 
2520 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2521 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2522 
2523 	if (nat->nat_ifnames[1][0] !='\0') {
2524 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2525 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2526 	} else {
2527 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2528 			       LIFNAMSIZ);
2529 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2530 		nat->nat_ifps[1] = nat->nat_ifps[0];
2531 	}
2532 
2533 	nat->nat_next = ifs->ifs_nat_instances;
2534 	nat->nat_pnext = &ifs->ifs_nat_instances;
2535 	if (ifs->ifs_nat_instances)
2536 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2537 	ifs->ifs_nat_instances = nat;
2538 
2539 	natp = &ifs->ifs_nat_table[0][hv1];
2540 	if (*natp)
2541 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2542 	nat->nat_phnext[0] = natp;
2543 	nat->nat_hnext[0] = *natp;
2544 	*natp = nat;
2545 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2546 
2547 	natp = &ifs->ifs_nat_table[1][hv2];
2548 	if (*natp)
2549 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2550 	nat->nat_phnext[1] = natp;
2551 	nat->nat_hnext[1] = *natp;
2552 	*natp = nat;
2553 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2554 
2555 	fr_setnatqueue(nat, rev, ifs);
2556 
2557 	ifs->ifs_nat_stats.ns_added++;
2558 	ifs->ifs_nat_stats.ns_inuse++;
2559 	return 0;
2560 }
2561 
2562 
2563 /* ------------------------------------------------------------------------ */
2564 /* Function:    nat_icmperrorlookup                                         */
2565 /* Returns:     nat_t* - point to matching NAT structure                    */
2566 /* Parameters:  fin(I) - pointer to packet information                      */
2567 /*              dir(I) - direction of packet (in/out)                       */
2568 /*                                                                          */
2569 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2570 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2571 /* the required length.                                                     */
2572 /* ------------------------------------------------------------------------ */
2573 nat_t *nat_icmperrorlookup(fin, dir)
2574 fr_info_t *fin;
2575 int dir;
2576 {
2577 	int flags = 0, minlen;
2578 	icmphdr_t *orgicmp;
2579 	tcphdr_t *tcp = NULL;
2580 	u_short data[2];
2581 	nat_t *nat;
2582 	ip_t *oip;
2583 	u_int p;
2584 
2585 	/*
2586 	 * Does it at least have the return (basic) IP header ?
2587 	 * Only a basic IP header (no options) should be with an ICMP error
2588 	 * header.  Also, if it's not an error type, then return.
2589 	 */
2590 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2591 		return NULL;
2592 
2593 	/*
2594 	 * Check packet size
2595 	 */
2596 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2597 	minlen = IP_HL(oip) << 2;
2598 	if ((minlen < sizeof(ip_t)) ||
2599 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2600 		return NULL;
2601 	/*
2602 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2603 	 * header claimed in the encapsulated part which is of concern.  It
2604 	 * may be too big to be in this buffer but not so big that it's
2605 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2606 	 * This is possible because we don't know how big oip_hl is when we
2607 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2608 	 * all here now.
2609 	 */
2610 #ifdef  _KERNEL
2611 	{
2612 	mb_t *m;
2613 
2614 	m = fin->fin_m;
2615 # if defined(MENTAT)
2616 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2617 		return NULL;
2618 # else
2619 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2620 	    (char *)fin->fin_ip + M_LEN(m))
2621 		return NULL;
2622 # endif
2623 	}
2624 #endif
2625 
2626 	if (fin->fin_daddr != oip->ip_src.s_addr)
2627 		return NULL;
2628 
2629 	p = oip->ip_p;
2630 	if (p == IPPROTO_TCP)
2631 		flags = IPN_TCP;
2632 	else if (p == IPPROTO_UDP)
2633 		flags = IPN_UDP;
2634 	else if (p == IPPROTO_ICMP) {
2635 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2636 
2637 		/* see if this is related to an ICMP query */
2638 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2639 			data[0] = fin->fin_data[0];
2640 			data[1] = fin->fin_data[1];
2641 			fin->fin_data[0] = 0;
2642 			fin->fin_data[1] = orgicmp->icmp_id;
2643 
2644 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2645 			/*
2646 			 * NOTE : dir refers to the direction of the original
2647 			 *        ip packet. By definition the icmp error
2648 			 *        message flows in the opposite direction.
2649 			 */
2650 			if (dir == NAT_INBOUND)
2651 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2652 						   oip->ip_src);
2653 			else
2654 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2655 						    oip->ip_src);
2656 			fin->fin_data[0] = data[0];
2657 			fin->fin_data[1] = data[1];
2658 			return nat;
2659 		}
2660 	}
2661 
2662 	if (flags & IPN_TCPUDP) {
2663 		minlen += 8;		/* + 64bits of data to get ports */
2664 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2665 			return NULL;
2666 
2667 		data[0] = fin->fin_data[0];
2668 		data[1] = fin->fin_data[1];
2669 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2670 		fin->fin_data[0] = ntohs(tcp->th_dport);
2671 		fin->fin_data[1] = ntohs(tcp->th_sport);
2672 
2673 		if (dir == NAT_INBOUND) {
2674 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2675 					   oip->ip_src);
2676 		} else {
2677 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2678 					    oip->ip_src);
2679 		}
2680 		fin->fin_data[0] = data[0];
2681 		fin->fin_data[1] = data[1];
2682 		return nat;
2683 	}
2684 	if (dir == NAT_INBOUND)
2685 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2686 	else
2687 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2688 }
2689 
2690 
2691 /* ------------------------------------------------------------------------ */
2692 /* Function:    nat_icmperror                                               */
2693 /* Returns:     nat_t* - point to matching NAT structure                    */
2694 /* Parameters:  fin(I)    - pointer to packet information                   */
2695 /*              nflags(I) - NAT flags for this packet                       */
2696 /*              dir(I)    - direction of packet (in/out)                    */
2697 /*                                                                          */
2698 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2699 /* session.  This will correct both packet header data and checksums.       */
2700 /*                                                                          */
2701 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2702 /* a NAT'd ICMP packet gets correctly recognised.                           */
2703 /* ------------------------------------------------------------------------ */
2704 nat_t *nat_icmperror(fin, nflags, dir)
2705 fr_info_t *fin;
2706 u_int *nflags;
2707 int dir;
2708 {
2709 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2710 	struct in_addr in;
2711 	icmphdr_t *icmp, *orgicmp;
2712 	int dlen;
2713 	udphdr_t *udp;
2714 	tcphdr_t *tcp;
2715 	nat_t *nat;
2716 	ip_t *oip;
2717 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
2718 		return NULL;
2719 
2720 	/*
2721 	 * nat_icmperrorlookup() looks up nat entry associated with the
2722 	 * offending IP packet and returns pointer to the entry, or NULL
2723 	 * if packet wasn't natted or for `defective' packets.
2724 	 */
2725 
2726 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
2727 		return NULL;
2728 
2729 	sumd2 = 0;
2730 	*nflags = IPN_ICMPERR;
2731 	icmp = fin->fin_dp;
2732 	oip = (ip_t *)&icmp->icmp_ip;
2733 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
2734 	tcp = (tcphdr_t *)udp;
2735 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
2736 
2737 	/*
2738 	 * Need to adjust ICMP header to include the real IP#'s and
2739 	 * port #'s.  There are three steps required.
2740 	 *
2741 	 * Step 1
2742 	 * Fix the IP addresses in the offending IP packet and update
2743 	 * ip header checksum to compensate for the change.
2744 	 *
2745 	 * No update needed here for icmp_cksum because the ICMP checksum
2746 	 * is calculated over the complete ICMP packet, which includes the
2747 	 * changed oip IP addresses and oip->ip_sum.  These two changes
2748 	 * cancel each other out (if the delta for the IP address is x,
2749 	 * then the delta for ip_sum is minus x).
2750 	 */
2751 
2752 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
2753 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
2754 		in = nat->nat_inip;
2755 		oip->ip_src = in;
2756 	} else {
2757 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
2758 		in = nat->nat_outip;
2759 		oip->ip_dst = in;
2760 	}
2761 
2762 	sum2 = LONG_SUM(ntohl(in.s_addr));
2763 	CALC_SUMD(sum1, sum2, sumd);
2764 	fix_datacksum(&oip->ip_sum, sumd);
2765 
2766 	/*
2767 	 * Step 2
2768 	 * Perform other adjustments based on protocol of offending packet.
2769 	 */
2770 
2771 	switch (oip->ip_p) {
2772 		case IPPROTO_TCP :
2773 		case IPPROTO_UDP :
2774 
2775 			/*
2776 			* For offending TCP/UDP IP packets, translate the ports
2777 			* based on the NAT specification.
2778 			*
2779 			* Advance notice : Now it becomes complicated :-)
2780 			*
2781 			* Since the port and IP addresse fields are both part
2782 			* of the TCP/UDP checksum of the offending IP packet,
2783 			* we need to adjust that checksum as well.
2784 			*
2785 			* To further complicate things, the TCP/UDP checksum
2786 			* may not be present.  We must check to see if the
2787 			* length of the data portion is big enough to hold
2788 			* the checksum.  In the UDP case, a test to determine
2789 			* if the checksum is even set is also required.
2790 			*
2791 			* Any changes to an IP address, port or checksum within
2792 			* the ICMP packet requires a change to icmp_cksum.
2793 			*
2794 			* Be extremely careful here ... The change is dependent
2795 			* upon whether or not the TCP/UPD checksum is present.
2796 			*
2797 			* If TCP/UPD checksum is present, the icmp_cksum must
2798 			* compensate for checksum modification resulting from
2799 			* IP address change only.  Port change and resulting
2800 			* data checksum adjustments cancel each other out.
2801 			*
2802 			* If TCP/UDP checksum is not present, icmp_cksum must
2803 			* compensate for port change only.  The IP address
2804 			* change does not modify anything else in this case.
2805 			*/
2806 
2807 			psum1 = 0;
2808 			psum2 = 0;
2809 			psumd = 0;
2810 
2811 			if ((tcp->th_dport == nat->nat_oport) &&
2812 			    (tcp->th_sport != nat->nat_inport)) {
2813 
2814 				/*
2815 				 * Translate the source port.
2816 				 */
2817 
2818 				psum1 = ntohs(tcp->th_sport);
2819 				psum2 = ntohs(nat->nat_inport);
2820 				tcp->th_sport = nat->nat_inport;
2821 
2822 			} else if ((tcp->th_sport == nat->nat_oport) &&
2823 				    (tcp->th_dport != nat->nat_outport)) {
2824 
2825 				/*
2826 				 * Translate the destination port.
2827 				 */
2828 
2829 				psum1 = ntohs(tcp->th_dport);
2830 				psum2 = ntohs(nat->nat_outport);
2831 				tcp->th_dport = nat->nat_outport;
2832 			}
2833 
2834 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
2835 
2836 				/*
2837 				 * TCP checksum present.
2838 				 *
2839 				 * Adjust data checksum and icmp checksum to
2840 				 * compensate for any IP address change.
2841 				 */
2842 
2843 				sum1 = ntohs(tcp->th_sum);
2844 				fix_datacksum(&tcp->th_sum, sumd);
2845 				sum2 = ntohs(tcp->th_sum);
2846 				sumd2 = sumd << 1;
2847 				CALC_SUMD(sum1, sum2, sumd);
2848 				sumd2 += sumd;
2849 
2850 				/*
2851 				 * Also make data checksum adjustment to
2852 				 * compensate for any port change.
2853 				 */
2854 
2855 				if (psum1 != psum2) {
2856 					CALC_SUMD(psum1, psum2, psumd);
2857 					fix_datacksum(&tcp->th_sum, psumd);
2858 				}
2859 
2860 			} else if ((oip->ip_p == IPPROTO_UDP) &&
2861 				   (dlen >= 8) && (udp->uh_sum != 0)) {
2862 
2863 				/*
2864 				 * The UDP checksum is present and set.
2865 				 *
2866 				 * Adjust data checksum and icmp checksum to
2867 				 * compensate for any IP address change.
2868 				 */
2869 
2870 				sum1 = ntohs(udp->uh_sum);
2871 				fix_datacksum(&udp->uh_sum, sumd);
2872 				sum2 = ntohs(udp->uh_sum);
2873 				sumd2 = sumd << 1;
2874 				CALC_SUMD(sum1, sum2, sumd);
2875 				sumd2 += sumd;
2876 
2877 				/*
2878 				 * Also make data checksum adjustment to
2879 				 * compensate for any port change.
2880 				 */
2881 
2882 				if (psum1 != psum2) {
2883 					CALC_SUMD(psum1, psum2, psumd);
2884 					fix_datacksum(&udp->uh_sum, psumd);
2885 				}
2886 
2887 			} else {
2888 
2889 				/*
2890 				 * Data checksum was not present.
2891 				 *
2892 				 * Compensate for any port change.
2893 				 */
2894 
2895 				CALC_SUMD(psum2, psum1, psumd);
2896 				sumd2 += psumd;
2897 			}
2898 			break;
2899 
2900 		case IPPROTO_ICMP :
2901 
2902 			orgicmp = (icmphdr_t *)udp;
2903 
2904 			if ((nat->nat_dir == NAT_OUTBOUND) &&
2905 			    (orgicmp->icmp_id != nat->nat_inport) &&
2906 			    (dlen >= 8)) {
2907 
2908 				/*
2909 				 * Fix ICMP checksum (of the offening ICMP
2910 				 * query packet) to compensate the change
2911 				 * in the ICMP id of the offending ICMP
2912 				 * packet.
2913 				 *
2914 				 * Since you modify orgicmp->icmp_id with
2915 				 * a delta (say x) and you compensate that
2916 				 * in origicmp->icmp_cksum with a delta
2917 				 * minus x, you don't have to adjust the
2918 				 * overall icmp->icmp_cksum
2919 				 */
2920 
2921 				sum1 = ntohs(orgicmp->icmp_id);
2922 				sum2 = ntohs(nat->nat_inport);
2923 				CALC_SUMD(sum1, sum2, sumd);
2924 				orgicmp->icmp_id = nat->nat_inport;
2925 				fix_datacksum(&orgicmp->icmp_cksum, sumd);
2926 
2927 			} /* nat_dir can't be NAT_INBOUND for icmp queries */
2928 
2929 			break;
2930 
2931 		default :
2932 
2933 			break;
2934 
2935 	} /* switch (oip->ip_p) */
2936 
2937 	/*
2938 	 * Step 3
2939 	 * Make the adjustments to icmp checksum.
2940 	 */
2941 
2942 	if (sumd2 != 0) {
2943 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2944 		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2945 		fix_incksum(&icmp->icmp_cksum, sumd2);
2946 	}
2947 	return nat;
2948 }
2949 
2950 
2951 /*
2952  * NB: these lookups don't lock access to the list, it assumed that it has
2953  * already been done!
2954  */
2955 
2956 /* ------------------------------------------------------------------------ */
2957 /* Function:    nat_inlookup                                                */
2958 /* Returns:     nat_t* - NULL == no match,                                  */
2959 /*                       else pointer to matching NAT entry                 */
2960 /* Parameters:  fin(I)    - pointer to packet information                   */
2961 /*              flags(I)  - NAT flags for this packet                       */
2962 /*              p(I)      - protocol for this packet                        */
2963 /*              src(I)    - source IP address                               */
2964 /*              mapdst(I) - destination IP address                          */
2965 /*                                                                          */
2966 /* Lookup a nat entry based on the mapped destination ip address/port and   */
2967 /* real source address/port.  We use this lookup when receiving a packet,   */
2968 /* we're looking for a table entry, based on the destination address.       */
2969 /*                                                                          */
2970 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
2971 /*                                                                          */
2972 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
2973 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
2974 /*                                                                          */
2975 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
2976 /*            the packet is of said protocol                                */
2977 /* ------------------------------------------------------------------------ */
2978 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
2979 fr_info_t *fin;
2980 u_int flags, p;
2981 struct in_addr src , mapdst;
2982 {
2983 	u_short sport, dport;
2984 	ipnat_t *ipn;
2985 	u_int sflags;
2986 	nat_t *nat;
2987 	int nflags;
2988 	u_32_t dst;
2989 	void *ifp;
2990 	u_int hv;
2991 	ipf_stack_t *ifs = fin->fin_ifs;
2992 
2993 	if (fin != NULL)
2994 		ifp = fin->fin_ifp;
2995 	else
2996 		ifp = NULL;
2997 	sport = 0;
2998 	dport = 0;
2999 	dst = mapdst.s_addr;
3000 	sflags = flags & NAT_TCPUDPICMP;
3001 
3002 	switch (p)
3003 	{
3004 	case IPPROTO_TCP :
3005 	case IPPROTO_UDP :
3006 		sport = htons(fin->fin_data[0]);
3007 		dport = htons(fin->fin_data[1]);
3008 		break;
3009 	case IPPROTO_ICMP :
3010 		if (flags & IPN_ICMPERR)
3011 			sport = fin->fin_data[1];
3012 		else
3013 			dport = fin->fin_data[1];
3014 		break;
3015 	default :
3016 		break;
3017 	}
3018 
3019 
3020 	if ((flags & SI_WILDP) != 0)
3021 		goto find_in_wild_ports;
3022 
3023 	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3024 	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3025 	nat = ifs->ifs_nat_table[1][hv];
3026 	for (; nat; nat = nat->nat_hnext[1]) {
3027 		if (nat->nat_ifps[0] != NULL) {
3028 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3029 				continue;
3030 		} else if (ifp != NULL)
3031 			nat->nat_ifps[0] = ifp;
3032 
3033 		nflags = nat->nat_flags;
3034 
3035 		if (nat->nat_oip.s_addr == src.s_addr &&
3036 		    nat->nat_outip.s_addr == dst &&
3037 		    (((p == 0) &&
3038 		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3039 		     || (p == nat->nat_p))) {
3040 			switch (p)
3041 			{
3042 #if 0
3043 			case IPPROTO_GRE :
3044 				if (nat->nat_call[1] != fin->fin_data[0])
3045 					continue;
3046 				break;
3047 #endif
3048 			case IPPROTO_ICMP :
3049 				if ((flags & IPN_ICMPERR) != 0) {
3050 					if (nat->nat_outport != sport)
3051 						continue;
3052 				} else {
3053 					if (nat->nat_outport != dport)
3054 						continue;
3055 				}
3056 				break;
3057 			case IPPROTO_TCP :
3058 			case IPPROTO_UDP :
3059 				if (nat->nat_oport != sport)
3060 					continue;
3061 				if (nat->nat_outport != dport)
3062 					continue;
3063 				break;
3064 			default :
3065 				break;
3066 			}
3067 
3068 			ipn = nat->nat_ptr;
3069 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3070 				if (appr_match(fin, nat) != 0)
3071 					continue;
3072 			return nat;
3073 		}
3074 	}
3075 
3076 	/*
3077 	 * So if we didn't find it but there are wildcard members in the hash
3078 	 * table, go back and look for them.  We do this search and update here
3079 	 * because it is modifying the NAT table and we want to do this only
3080 	 * for the first packet that matches.  The exception, of course, is
3081 	 * for "dummy" (FI_IGNORE) lookups.
3082 	 */
3083 find_in_wild_ports:
3084 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3085 		return NULL;
3086 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3087 		return NULL;
3088 
3089 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3090 
3091 	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3092 	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3093 
3094 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3095 
3096 	nat = ifs->ifs_nat_table[1][hv];
3097 	for (; nat; nat = nat->nat_hnext[1]) {
3098 		if (nat->nat_ifps[0] != NULL) {
3099 			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3100 				continue;
3101 		} else if (ifp != NULL)
3102 			nat->nat_ifps[0] = ifp;
3103 
3104 		if (nat->nat_p != fin->fin_p)
3105 			continue;
3106 		if (nat->nat_oip.s_addr != src.s_addr ||
3107 		    nat->nat_outip.s_addr != dst)
3108 			continue;
3109 
3110 		nflags = nat->nat_flags;
3111 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3112 			continue;
3113 
3114 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3115 			       NAT_INBOUND) == 1) {
3116 			if ((fin->fin_flx & FI_IGNORE) != 0)
3117 				break;
3118 			if ((nflags & SI_CLONE) != 0) {
3119 				nat = fr_natclone(fin, nat);
3120 				if (nat == NULL)
3121 					break;
3122 			} else {
3123 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3124 				ifs->ifs_nat_stats.ns_wilds--;
3125 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3126 			}
3127 			nat->nat_oport = sport;
3128 			nat->nat_outport = dport;
3129 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3130 			nat_tabmove(nat, ifs);
3131 			break;
3132 		}
3133 	}
3134 
3135 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3136 
3137 	return nat;
3138 }
3139 
3140 
3141 /* ------------------------------------------------------------------------ */
3142 /* Function:    nat_tabmove                                                 */
3143 /* Returns:     Nil                                                         */
3144 /* Parameters:  nat(I) - pointer to NAT structure                           */
3145 /* Write Lock:  ipf_nat                                                     */
3146 /*                                                                          */
3147 /* This function is only called for TCP/UDP NAT table entries where the     */
3148 /* original was placed in the table without hashing on the ports and we now */
3149 /* want to include hashing on port numbers.                                 */
3150 /* ------------------------------------------------------------------------ */
3151 static void nat_tabmove(nat, ifs)
3152 nat_t *nat;
3153 ipf_stack_t *ifs;
3154 {
3155 	nat_t **natp;
3156 	u_int hv;
3157 
3158 	if (nat->nat_flags & SI_CLONE)
3159 		return;
3160 
3161 	/*
3162 	 * Remove the NAT entry from the old location
3163 	 */
3164 	if (nat->nat_hnext[0])
3165 		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3166 	*nat->nat_phnext[0] = nat->nat_hnext[0];
3167 	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3168 
3169 	if (nat->nat_hnext[1])
3170 		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3171 	*nat->nat_phnext[1] = nat->nat_hnext[1];
3172 	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3173 
3174 	/*
3175 	 * Add into the NAT table in the new position
3176 	 */
3177 	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3178 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3179 			 ifs->ifs_ipf_nattable_sz);
3180 	nat->nat_hv[0] = hv;
3181 	natp = &ifs->ifs_nat_table[0][hv];
3182 	if (*natp)
3183 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3184 	nat->nat_phnext[0] = natp;
3185 	nat->nat_hnext[0] = *natp;
3186 	*natp = nat;
3187 	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3188 
3189 	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3190 	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3191 			 ifs->ifs_ipf_nattable_sz);
3192 	nat->nat_hv[1] = hv;
3193 	natp = &ifs->ifs_nat_table[1][hv];
3194 	if (*natp)
3195 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3196 	nat->nat_phnext[1] = natp;
3197 	nat->nat_hnext[1] = *natp;
3198 	*natp = nat;
3199 	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3200 }
3201 
3202 
3203 /* ------------------------------------------------------------------------ */
3204 /* Function:    nat_outlookup                                               */
3205 /* Returns:     nat_t* - NULL == no match,                                  */
3206 /*                       else pointer to matching NAT entry                 */
3207 /* Parameters:  fin(I)   - pointer to packet information                    */
3208 /*              flags(I) - NAT flags for this packet                        */
3209 /*              p(I)     - protocol for this packet                         */
3210 /*              src(I)   - source IP address                                */
3211 /*              dst(I)   - destination IP address                           */
3212 /*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3213 /*                                                                          */
3214 /* Lookup a nat entry based on the source 'real' ip address/port and        */
3215 /* destination address/port.  We use this lookup when sending a packet out, */
3216 /* we're looking for a table entry, based on the source address.            */
3217 /*                                                                          */
3218 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3219 /*                                                                          */
3220 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3221 /*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3222 /*                                                                          */
3223 /* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3224 /*            the packet is of said protocol                                */
3225 /* ------------------------------------------------------------------------ */
3226 nat_t *nat_outlookup(fin, flags, p, src, dst)
3227 fr_info_t *fin;
3228 u_int flags, p;
3229 struct in_addr src , dst;
3230 {
3231 	u_short sport, dport;
3232 	u_int sflags;
3233 	ipnat_t *ipn;
3234 	u_32_t srcip;
3235 	nat_t *nat;
3236 	int nflags;
3237 	void *ifp;
3238 	u_int hv;
3239 	ipf_stack_t *ifs = fin->fin_ifs;
3240 
3241 	ifp = fin->fin_ifp;
3242 
3243 	srcip = src.s_addr;
3244 	sflags = flags & IPN_TCPUDPICMP;
3245 	sport = 0;
3246 	dport = 0;
3247 
3248 	switch (p)
3249 	{
3250 	case IPPROTO_TCP :
3251 	case IPPROTO_UDP :
3252 		sport = htons(fin->fin_data[0]);
3253 		dport = htons(fin->fin_data[1]);
3254 		break;
3255 	case IPPROTO_ICMP :
3256 		if (flags & IPN_ICMPERR)
3257 			sport = fin->fin_data[1];
3258 		else
3259 			dport = fin->fin_data[1];
3260 		break;
3261 	default :
3262 		break;
3263 	}
3264 
3265 	if ((flags & SI_WILDP) != 0)
3266 		goto find_out_wild_ports;
3267 
3268 	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3269 	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3270 	nat = ifs->ifs_nat_table[0][hv];
3271 	for (; nat; nat = nat->nat_hnext[0]) {
3272 		if (nat->nat_ifps[1] != NULL) {
3273 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3274 				continue;
3275 		} else if (ifp != NULL)
3276 			nat->nat_ifps[1] = ifp;
3277 
3278 		nflags = nat->nat_flags;
3279 
3280 		if (nat->nat_inip.s_addr == srcip &&
3281 		    nat->nat_oip.s_addr == dst.s_addr &&
3282 		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3283 		     || (p == nat->nat_p))) {
3284 			switch (p)
3285 			{
3286 #if 0
3287 			case IPPROTO_GRE :
3288 				if (nat->nat_call[1] != fin->fin_data[0])
3289 					continue;
3290 				break;
3291 #endif
3292 			case IPPROTO_TCP :
3293 			case IPPROTO_UDP :
3294 				if (nat->nat_oport != dport)
3295 					continue;
3296 				if (nat->nat_inport != sport)
3297 					continue;
3298 				break;
3299 			default :
3300 				break;
3301 			}
3302 
3303 			ipn = nat->nat_ptr;
3304 			if ((ipn != NULL) && (nat->nat_aps != NULL))
3305 				if (appr_match(fin, nat) != 0)
3306 					continue;
3307 			return nat;
3308 		}
3309 	}
3310 
3311 	/*
3312 	 * So if we didn't find it but there are wildcard members in the hash
3313 	 * table, go back and look for them.  We do this search and update here
3314 	 * because it is modifying the NAT table and we want to do this only
3315 	 * for the first packet that matches.  The exception, of course, is
3316 	 * for "dummy" (FI_IGNORE) lookups.
3317 	 */
3318 find_out_wild_ports:
3319 	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3320 		return NULL;
3321 	if (ifs->ifs_nat_stats.ns_wilds == 0)
3322 		return NULL;
3323 
3324 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3325 
3326 	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3327 	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3328 
3329 	WRITE_ENTER(&ifs->ifs_ipf_nat);
3330 
3331 	nat = ifs->ifs_nat_table[0][hv];
3332 	for (; nat; nat = nat->nat_hnext[0]) {
3333 		if (nat->nat_ifps[1] != NULL) {
3334 			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3335 				continue;
3336 		} else if (ifp != NULL)
3337 			nat->nat_ifps[1] = ifp;
3338 
3339 		if (nat->nat_p != fin->fin_p)
3340 			continue;
3341 		if ((nat->nat_inip.s_addr != srcip) ||
3342 		    (nat->nat_oip.s_addr != dst.s_addr))
3343 			continue;
3344 
3345 		nflags = nat->nat_flags;
3346 		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3347 			continue;
3348 
3349 		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3350 			       NAT_OUTBOUND) == 1) {
3351 			if ((fin->fin_flx & FI_IGNORE) != 0)
3352 				break;
3353 			if ((nflags & SI_CLONE) != 0) {
3354 				nat = fr_natclone(fin, nat);
3355 				if (nat == NULL)
3356 					break;
3357 			} else {
3358 				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3359 				ifs->ifs_nat_stats.ns_wilds--;
3360 				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3361 			}
3362 			nat->nat_inport = sport;
3363 			nat->nat_oport = dport;
3364 			if (nat->nat_outport == 0)
3365 				nat->nat_outport = sport;
3366 			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3367 			nat_tabmove(nat, ifs);
3368 			break;
3369 		}
3370 	}
3371 
3372 	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3373 
3374 	return nat;
3375 }
3376 
3377 
3378 /* ------------------------------------------------------------------------ */
3379 /* Function:    nat_lookupredir                                             */
3380 /* Returns:     nat_t* - NULL == no match,                                  */
3381 /*                       else pointer to matching NAT entry                 */
3382 /* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3383 /*                      entry for.                                          */
3384 /*                                                                          */
3385 /* Lookup the NAT tables to search for a matching redirect                  */
3386 /* ------------------------------------------------------------------------ */
3387 nat_t *nat_lookupredir(np, ifs)
3388 natlookup_t *np;
3389 ipf_stack_t *ifs;
3390 {
3391 	fr_info_t fi;
3392 	nat_t *nat;
3393 
3394 	bzero((char *)&fi, sizeof(fi));
3395 	if (np->nl_flags & IPN_IN) {
3396 		fi.fin_data[0] = ntohs(np->nl_realport);
3397 		fi.fin_data[1] = ntohs(np->nl_outport);
3398 	} else {
3399 		fi.fin_data[0] = ntohs(np->nl_inport);
3400 		fi.fin_data[1] = ntohs(np->nl_outport);
3401 	}
3402 	if (np->nl_flags & IPN_TCP)
3403 		fi.fin_p = IPPROTO_TCP;
3404 	else if (np->nl_flags & IPN_UDP)
3405 		fi.fin_p = IPPROTO_UDP;
3406 	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3407 		fi.fin_p = IPPROTO_ICMP;
3408 
3409 	fi.fin_ifs = ifs;
3410 	/*
3411 	 * We can do two sorts of lookups:
3412 	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3413 	 * - default: we have the `in' and `out' address, look for `real'.
3414 	 */
3415 	if (np->nl_flags & IPN_IN) {
3416 		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3417 					np->nl_realip, np->nl_outip))) {
3418 			np->nl_inip = nat->nat_inip;
3419 			np->nl_inport = nat->nat_inport;
3420 		}
3421 	} else {
3422 		/*
3423 		 * If nl_inip is non null, this is a lookup based on the real
3424 		 * ip address. Else, we use the fake.
3425 		 */
3426 		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3427 					 np->nl_inip, np->nl_outip))) {
3428 
3429 			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3430 				fr_info_t fin;
3431 				bzero((char *)&fin, sizeof(fin));
3432 				fin.fin_p = nat->nat_p;
3433 				fin.fin_data[0] = ntohs(nat->nat_outport);
3434 				fin.fin_data[1] = ntohs(nat->nat_oport);
3435 				fin.fin_ifs = ifs;
3436 				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3437 						 nat->nat_outip,
3438 						 nat->nat_oip) != NULL) {
3439 					np->nl_flags &= ~IPN_FINDFORWARD;
3440 				}
3441 			}
3442 
3443 			np->nl_realip = nat->nat_outip;
3444 			np->nl_realport = nat->nat_outport;
3445 		}
3446  	}
3447 
3448 	return nat;
3449 }
3450 
3451 
3452 /* ------------------------------------------------------------------------ */
3453 /* Function:    nat_match                                                   */
3454 /* Returns:     int - 0 == no match, 1 == match                             */
3455 /* Parameters:  fin(I)   - pointer to packet information                    */
3456 /*              np(I)    - pointer to NAT rule                              */
3457 /*                                                                          */
3458 /* Pull the matching of a packet against a NAT rule out of that complex     */
3459 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3460 /* ------------------------------------------------------------------------ */
3461 static int nat_match(fin, np)
3462 fr_info_t *fin;
3463 ipnat_t *np;
3464 {
3465 	frtuc_t *ft;
3466 
3467 	if (fin->fin_v != 4)
3468 		return 0;
3469 
3470 	if (np->in_p && fin->fin_p != np->in_p)
3471 		return 0;
3472 
3473 	if (fin->fin_out) {
3474 		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3475 			return 0;
3476 		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3477 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3478 			return 0;
3479 		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3480 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3481 			return 0;
3482 	} else {
3483 		if (!(np->in_redir & NAT_REDIRECT))
3484 			return 0;
3485 		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3486 		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3487 			return 0;
3488 		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3489 		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3490 			return 0;
3491 	}
3492 
3493 	ft = &np->in_tuc;
3494 	if (!(fin->fin_flx & FI_TCPUDP) ||
3495 	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3496 		if (ft->ftu_scmp || ft->ftu_dcmp)
3497 			return 0;
3498 		return 1;
3499 	}
3500 
3501 	return fr_tcpudpchk(fin, ft);
3502 }
3503 
3504 
3505 /* ------------------------------------------------------------------------ */
3506 /* Function:    nat_update                                                  */
3507 /* Returns:     Nil                                                         */
3508 /* Parameters:  nat(I)    - pointer to NAT structure                        */
3509 /*              np(I)     - pointer to NAT rule                             */
3510 /*                                                                          */
3511 /* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3512 /* called with fin_rev updated - i.e. after calling nat_proto().            */
3513 /* ------------------------------------------------------------------------ */
3514 void nat_update(fin, nat, np)
3515 fr_info_t *fin;
3516 nat_t *nat;
3517 ipnat_t *np;
3518 {
3519 	ipftq_t *ifq, *ifq2;
3520 	ipftqent_t *tqe;
3521 	ipf_stack_t *ifs = fin->fin_ifs;
3522 
3523 	MUTEX_ENTER(&nat->nat_lock);
3524 	tqe = &nat->nat_tqe;
3525 	ifq = tqe->tqe_ifq;
3526 
3527 	/*
3528 	 * We allow over-riding of NAT timeouts from NAT rules, even for
3529 	 * TCP, however, if it is TCP and there is no rule timeout set,
3530 	 * then do not update the timeout here.
3531 	 */
3532 	if (np != NULL)
3533 		ifq2 = np->in_tqehead[fin->fin_rev];
3534 	else
3535 		ifq2 = NULL;
3536 
3537 	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3538 		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3539 	} else {
3540 		if (ifq2 == NULL) {
3541 			if (nat->nat_p == IPPROTO_UDP)
3542 				ifq2 = &ifs->ifs_nat_udptq;
3543 			else if (nat->nat_p == IPPROTO_ICMP)
3544 				ifq2 = &ifs->ifs_nat_icmptq;
3545 			else
3546 				ifq2 = &ifs->ifs_nat_iptq;
3547 		}
3548 
3549 		fr_movequeue(tqe, ifq, ifq2, ifs);
3550 	}
3551 	MUTEX_EXIT(&nat->nat_lock);
3552 }
3553 
3554 
3555 /* ------------------------------------------------------------------------ */
3556 /* Function:    fr_checknatout                                              */
3557 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3558 /*                     0 == no packet translation occurred,                 */
3559 /*                     1 == packet was successfully translated.             */
3560 /* Parameters:  fin(I)   - pointer to packet information                    */
3561 /*              passp(I) - pointer to filtering result flags                */
3562 /*                                                                          */
3563 /* Check to see if an outcoming packet should be changed.  ICMP packets are */
3564 /* first checked to see if they match an existing entry (if an error),      */
3565 /* otherwise a search of the current NAT table is made.  If neither results */
3566 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3567 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3568 /* packet header(s) as required.                                            */
3569 /* ------------------------------------------------------------------------ */
3570 int fr_checknatout(fin, passp)
3571 fr_info_t *fin;
3572 u_32_t *passp;
3573 {
3574 	struct ifnet *ifp, *sifp;
3575 	icmphdr_t *icmp = NULL;
3576 	tcphdr_t *tcp = NULL;
3577 	int rval, natfailed;
3578 	ipnat_t *np = NULL;
3579 	u_int nflags = 0;
3580 	u_32_t ipa, iph;
3581 	int natadd = 1;
3582 	frentry_t *fr;
3583 	nat_t *nat;
3584 	ipf_stack_t *ifs = fin->fin_ifs;
3585 
3586 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3587 		return 0;
3588 
3589 	natfailed = 0;
3590 	fr = fin->fin_fr;
3591 	sifp = fin->fin_ifp;
3592 	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3593 	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3594 	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3595 		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3596 	ifp = fin->fin_ifp;
3597 
3598 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3599 		switch (fin->fin_p)
3600 		{
3601 		case IPPROTO_TCP :
3602 			nflags = IPN_TCP;
3603 			break;
3604 		case IPPROTO_UDP :
3605 			nflags = IPN_UDP;
3606 			break;
3607 		case IPPROTO_ICMP :
3608 			icmp = fin->fin_dp;
3609 
3610 			/*
3611 			 * This is an incoming packet, so the destination is
3612 			 * the icmp_id and the source port equals 0
3613 			 */
3614 			if (nat_icmpquerytype4(icmp->icmp_type))
3615 				nflags = IPN_ICMPQUERY;
3616 			break;
3617 		default :
3618 			break;
3619 		}
3620 
3621 		if ((nflags & IPN_TCPUDP))
3622 			tcp = fin->fin_dp;
3623 	}
3624 
3625 	ipa = fin->fin_saddr;
3626 
3627 	READ_ENTER(&ifs->ifs_ipf_nat);
3628 
3629 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3630 	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3631 		/*EMPTY*/;
3632 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3633 		natadd = 0;
3634 	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3635 				      fin->fin_src, fin->fin_dst))) {
3636 		nflags = nat->nat_flags;
3637 	} else {
3638 		u_32_t hv, msk, nmsk;
3639 
3640 		/*
3641 		 * If there is no current entry in the nat table for this IP#,
3642 		 * create one for it (if there is a matching rule).
3643 		 */
3644 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3645 		msk = 0xffffffff;
3646 		nmsk = ifs->ifs_nat_masks;
3647 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3648 maskloop:
3649 		iph = ipa & htonl(msk);
3650 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3651 		for (np = ifs->ifs_nat_rules[hv]; np; np = np->in_mnext)
3652 		{
3653 			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3654 				continue;
3655 			if (np->in_v != fin->fin_v)
3656 				continue;
3657 			if (np->in_p && (np->in_p != fin->fin_p))
3658 				continue;
3659 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3660 				continue;
3661 			if (np->in_flags & IPN_FILTER) {
3662 				if (!nat_match(fin, np))
3663 					continue;
3664 			} else if ((ipa & np->in_inmsk) != np->in_inip)
3665 				continue;
3666 
3667 			if ((fr != NULL) &&
3668 			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3669 				continue;
3670 
3671 			if (*np->in_plabel != '\0') {
3672 				if (((np->in_flags & IPN_FILTER) == 0) &&
3673 				    (np->in_dport != tcp->th_dport))
3674 					continue;
3675 				if (appr_ok(fin, tcp, np) == 0)
3676 					continue;
3677 			}
3678 
3679 			if ((nat = nat_new(fin, np, NULL, nflags,
3680 					   NAT_OUTBOUND))) {
3681 				np->in_hits++;
3682 				break;
3683 			} else
3684 				natfailed = -1;
3685 		}
3686 		if ((np == NULL) && (nmsk != 0)) {
3687 			while (nmsk) {
3688 				msk <<= 1;
3689 				if (nmsk & 0x80000000)
3690 					break;
3691 				nmsk <<= 1;
3692 			}
3693 			if (nmsk != 0) {
3694 				nmsk <<= 1;
3695 				goto maskloop;
3696 			}
3697 		}
3698 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3699 	}
3700 
3701 	if (nat != NULL) {
3702 		rval = fr_natout(fin, nat, natadd, nflags);
3703 		if (rval == 1) {
3704 			MUTEX_ENTER(&nat->nat_lock);
3705 			nat->nat_ref++;
3706 			MUTEX_EXIT(&nat->nat_lock);
3707 			nat->nat_touched = ifs->ifs_fr_ticks;
3708 			fin->fin_nat = nat;
3709 		}
3710 	} else
3711 		rval = natfailed;
3712 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3713 
3714 	if (rval == -1) {
3715 		if (passp != NULL)
3716 			*passp = FR_BLOCK;
3717 		fin->fin_flx |= FI_BADNAT;
3718 	}
3719 	fin->fin_ifp = sifp;
3720 	return rval;
3721 }
3722 
3723 /* ------------------------------------------------------------------------ */
3724 /* Function:    fr_natout                                                   */
3725 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3726 /*                     1 == packet was successfully translated.             */
3727 /* Parameters:  fin(I)    - pointer to packet information                   */
3728 /*              nat(I)    - pointer to NAT structure                        */
3729 /*              natadd(I) - flag indicating if it is safe to add frag cache */
3730 /*              nflags(I) - NAT flags set for this packet                   */
3731 /*                                                                          */
3732 /* Translate a packet coming "out" on an interface.                         */
3733 /* ------------------------------------------------------------------------ */
3734 int fr_natout(fin, nat, natadd, nflags)
3735 fr_info_t *fin;
3736 nat_t *nat;
3737 int natadd;
3738 u_32_t nflags;
3739 {
3740 	icmphdr_t *icmp;
3741 	u_short *csump;
3742 	u_32_t sumd;
3743 	tcphdr_t *tcp;
3744 	ipnat_t *np;
3745 	int i;
3746 	ipf_stack_t *ifs = fin->fin_ifs;
3747 
3748 #if SOLARIS && defined(_KERNEL)
3749 	net_data_t net_data_p;
3750 	if (fin->fin_v == 4)
3751 		net_data_p = ifs->ifs_ipf_ipv4;
3752 	else
3753 		net_data_p = ifs->ifs_ipf_ipv6;
3754 #endif
3755 
3756 	tcp = NULL;
3757 	icmp = NULL;
3758 	csump = NULL;
3759 	np = nat->nat_ptr;
3760 
3761 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
3762 		(void) fr_nat_newfrag(fin, 0, nat);
3763 
3764 	MUTEX_ENTER(&nat->nat_lock);
3765 	nat->nat_bytes[1] += fin->fin_plen;
3766 	nat->nat_pkts[1]++;
3767 	MUTEX_EXIT(&nat->nat_lock);
3768 
3769 	/*
3770 	 * Fix up checksums, not by recalculating them, but
3771 	 * simply computing adjustments.
3772 	 * This is only done for STREAMS based IP implementations where the
3773 	 * checksum has already been calculated by IP.  In all other cases,
3774 	 * IPFilter is called before the checksum needs calculating so there
3775 	 * is no call to modify whatever is in the header now.
3776 	 */
3777 	ASSERT(fin->fin_m != NULL);
3778 	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
3779 		if (nflags == IPN_ICMPERR) {
3780 			u_32_t s1, s2;
3781 
3782 			s1 = LONG_SUM(ntohl(fin->fin_saddr));
3783 			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
3784 			CALC_SUMD(s1, s2, sumd);
3785 
3786 			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
3787 		}
3788 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
3789     defined(linux) || defined(BRIDGE_IPF)
3790 		else {
3791 			/*
3792 			 * Strictly speaking, this isn't necessary on BSD
3793 			 * kernels because they do checksum calculation after
3794 			 * this code has run BUT if ipfilter is being used
3795 			 * to do NAT as a bridge, that code doesn't exist.
3796 			 */
3797 			if (nat->nat_dir == NAT_OUTBOUND)
3798 				fix_outcksum(&fin->fin_ip->ip_sum,
3799 					    nat->nat_ipsumd);
3800 			else
3801 				fix_incksum(&fin->fin_ip->ip_sum,
3802 				 	   nat->nat_ipsumd);
3803 		}
3804 #endif
3805 	}
3806 
3807 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3808 		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
3809 			tcp = fin->fin_dp;
3810 
3811 			tcp->th_sport = nat->nat_outport;
3812 			fin->fin_data[0] = ntohs(nat->nat_outport);
3813 		}
3814 
3815 		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
3816 			icmp = fin->fin_dp;
3817 			icmp->icmp_id = nat->nat_outport;
3818 		}
3819 
3820 		csump = nat_proto(fin, nat, nflags);
3821 	}
3822 
3823 	fin->fin_ip->ip_src = nat->nat_outip;
3824 
3825 	nat_update(fin, nat, np);
3826 
3827 	/*
3828 	 * The above comments do not hold for layer 4 (or higher) checksums...
3829 	 */
3830 	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
3831 		if (nflags & IPN_TCPUDP &&
3832 	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
3833 			sumd = nat->nat_sumd[1];
3834 		else
3835 			sumd = nat->nat_sumd[0];
3836 
3837 		if (nat->nat_dir == NAT_OUTBOUND)
3838 			fix_outcksum(csump, sumd);
3839 		else
3840 			fix_incksum(csump, sumd);
3841 	}
3842 #ifdef	IPFILTER_SYNC
3843 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
3844 #endif
3845 	/* ------------------------------------------------------------- */
3846 	/* A few quick notes:						 */
3847 	/*	Following are test conditions prior to calling the 	 */
3848 	/*	appr_check routine.					 */
3849 	/*								 */
3850 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
3851 	/*	with a redirect rule, we attempt to match the packet's	 */
3852 	/*	source port against in_dport, otherwise	we'd compare the */
3853 	/*	packet's destination.			 		 */
3854 	/* ------------------------------------------------------------- */
3855 	if ((np != NULL) && (np->in_apr != NULL)) {
3856 		i = appr_check(fin, nat);
3857 		if (i == 0)
3858 			i = 1;
3859 	} else
3860 		i = 1;
3861 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[1]);
3862 	fin->fin_flx |= FI_NATED;
3863 	return i;
3864 }
3865 
3866 
3867 /* ------------------------------------------------------------------------ */
3868 /* Function:    fr_checknatin                                               */
3869 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
3870 /*                     0 == no packet translation occurred,                 */
3871 /*                     1 == packet was successfully translated.             */
3872 /* Parameters:  fin(I)   - pointer to packet information                    */
3873 /*              passp(I) - pointer to filtering result flags                */
3874 /*                                                                          */
3875 /* Check to see if an incoming packet should be changed.  ICMP packets are  */
3876 /* first checked to see if they match an existing entry (if an error),      */
3877 /* otherwise a search of the current NAT table is made.  If neither results */
3878 /* in a match then a search for a matching NAT rule is made.  Create a new  */
3879 /* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3880 /* packet header(s) as required.                                            */
3881 /* ------------------------------------------------------------------------ */
3882 int fr_checknatin(fin, passp)
3883 fr_info_t *fin;
3884 u_32_t *passp;
3885 {
3886 	u_int nflags, natadd;
3887 	int rval, natfailed;
3888 	struct ifnet *ifp;
3889 	struct in_addr in;
3890 	icmphdr_t *icmp;
3891 	tcphdr_t *tcp;
3892 	u_short dport;
3893 	ipnat_t *np;
3894 	nat_t *nat;
3895 	u_32_t iph;
3896 	ipf_stack_t *ifs = fin->fin_ifs;
3897 
3898 	if (ifs->ifs_nat_stats.ns_rules == 0 || ifs->ifs_fr_nat_lock != 0)
3899 		return 0;
3900 
3901 	tcp = NULL;
3902 	icmp = NULL;
3903 	dport = 0;
3904 	natadd = 1;
3905 	nflags = 0;
3906 	natfailed = 0;
3907 	ifp = fin->fin_ifp;
3908 
3909 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3910 		switch (fin->fin_p)
3911 		{
3912 		case IPPROTO_TCP :
3913 			nflags = IPN_TCP;
3914 			break;
3915 		case IPPROTO_UDP :
3916 			nflags = IPN_UDP;
3917 			break;
3918 		case IPPROTO_ICMP :
3919 			icmp = fin->fin_dp;
3920 
3921 			/*
3922 			 * This is an incoming packet, so the destination is
3923 			 * the icmp_id and the source port equals 0
3924 			 */
3925 			if (nat_icmpquerytype4(icmp->icmp_type)) {
3926 				nflags = IPN_ICMPQUERY;
3927 				dport = icmp->icmp_id;
3928 			} break;
3929 		default :
3930 			break;
3931 		}
3932 
3933 		if ((nflags & IPN_TCPUDP)) {
3934 			tcp = fin->fin_dp;
3935 			dport = tcp->th_dport;
3936 		}
3937 	}
3938 
3939 	in = fin->fin_dst;
3940 
3941 	READ_ENTER(&ifs->ifs_ipf_nat);
3942 
3943 	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3944 	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
3945 		/*EMPTY*/;
3946 	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3947 		natadd = 0;
3948 	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3949 				     fin->fin_src, in))) {
3950 		nflags = nat->nat_flags;
3951 	} else {
3952 		u_32_t hv, msk, rmsk;
3953 
3954 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3955 		rmsk = ifs->ifs_rdr_masks;
3956 		msk = 0xffffffff;
3957 		WRITE_ENTER(&ifs->ifs_ipf_nat);
3958 		/*
3959 		 * If there is no current entry in the nat table for this IP#,
3960 		 * create one for it (if there is a matching rule).
3961 		 */
3962 maskloop:
3963 		iph = in.s_addr & htonl(msk);
3964 		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
3965 		for (np = ifs->ifs_rdr_rules[hv]; np; np = np->in_rnext) {
3966 			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
3967 				continue;
3968 			if (np->in_v != fin->fin_v)
3969 				continue;
3970 			if (np->in_p && (np->in_p != fin->fin_p))
3971 				continue;
3972 			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3973 				continue;
3974 			if (np->in_flags & IPN_FILTER) {
3975 				if (!nat_match(fin, np))
3976 					continue;
3977 			} else {
3978 				if ((in.s_addr & np->in_outmsk) != np->in_outip)
3979 					continue;
3980 				if (np->in_pmin &&
3981 				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
3982 				     (ntohs(dport) < ntohs(np->in_pmin))))
3983 					continue;
3984 			}
3985 
3986 			if (*np->in_plabel != '\0') {
3987 				if (!appr_ok(fin, tcp, np)) {
3988 					continue;
3989 				}
3990 			}
3991 
3992 			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
3993 			if (nat != NULL) {
3994 				np->in_hits++;
3995 				break;
3996 			} else
3997 				natfailed = -1;
3998 		}
3999 
4000 		if ((np == NULL) && (rmsk != 0)) {
4001 			while (rmsk) {
4002 				msk <<= 1;
4003 				if (rmsk & 0x80000000)
4004 					break;
4005 				rmsk <<= 1;
4006 			}
4007 			if (rmsk != 0) {
4008 				rmsk <<= 1;
4009 				goto maskloop;
4010 			}
4011 		}
4012 		MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4013 	}
4014 	if (nat != NULL) {
4015 		rval = fr_natin(fin, nat, natadd, nflags);
4016 		if (rval == 1) {
4017 			MUTEX_ENTER(&nat->nat_lock);
4018 			nat->nat_ref++;
4019 			MUTEX_EXIT(&nat->nat_lock);
4020 			nat->nat_touched = ifs->ifs_fr_ticks;
4021 			fin->fin_nat = nat;
4022 			fin->fin_state = nat->nat_state;
4023 		}
4024 	} else
4025 		rval = natfailed;
4026 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4027 
4028 	if (rval == -1) {
4029 		if (passp != NULL)
4030 			*passp = FR_BLOCK;
4031 		fin->fin_flx |= FI_BADNAT;
4032 	}
4033 	return rval;
4034 }
4035 
4036 
4037 /* ------------------------------------------------------------------------ */
4038 /* Function:    fr_natin                                                    */
4039 /* Returns:     int - -1 == packet failed NAT checks so block it,           */
4040 /*                     1 == packet was successfully translated.             */
4041 /* Parameters:  fin(I)    - pointer to packet information                   */
4042 /*              nat(I)    - pointer to NAT structure                        */
4043 /*              natadd(I) - flag indicating if it is safe to add frag cache */
4044 /*              nflags(I) - NAT flags set for this packet                   */
4045 /* Locks Held:  ipf_nat (READ)                                              */
4046 /*                                                                          */
4047 /* Translate a packet coming "in" on an interface.                          */
4048 /* ------------------------------------------------------------------------ */
4049 int fr_natin(fin, nat, natadd, nflags)
4050 fr_info_t *fin;
4051 nat_t *nat;
4052 int natadd;
4053 u_32_t nflags;
4054 {
4055 	icmphdr_t *icmp;
4056 	u_short *csump;
4057 	tcphdr_t *tcp;
4058 	ipnat_t *np;
4059 	int i;
4060 	ipf_stack_t *ifs = fin->fin_ifs;
4061 
4062 #if SOLARIS && defined(_KERNEL)
4063 	net_data_t net_data_p;
4064 	if (fin->fin_v == 4)
4065 		net_data_p = ifs->ifs_ipf_ipv4;
4066 	else
4067 		net_data_p = ifs->ifs_ipf_ipv6;
4068 #endif
4069 
4070 	tcp = NULL;
4071 	csump = NULL;
4072 	np = nat->nat_ptr;
4073 	fin->fin_fr = nat->nat_fr;
4074 
4075 	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4076 		(void) fr_nat_newfrag(fin, 0, nat);
4077 
4078 	if (np != NULL) {
4079 
4080 	/* ------------------------------------------------------------- */
4081 	/* A few quick notes:						 */
4082 	/*	Following are test conditions prior to calling the 	 */
4083 	/*	appr_check routine.					 */
4084 	/*								 */
4085 	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4086 	/*	with a map rule, we attempt to match the packet's	 */
4087 	/*	source port against in_dport, otherwise	we'd compare the */
4088 	/*	packet's destination.			 		 */
4089 	/* ------------------------------------------------------------- */
4090 		if (np->in_apr != NULL) {
4091 			i = appr_check(fin, nat);
4092 			if (i == -1) {
4093 				return -1;
4094 			}
4095 		}
4096 	}
4097 
4098 #ifdef	IPFILTER_SYNC
4099 	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4100 #endif
4101 
4102 	MUTEX_ENTER(&nat->nat_lock);
4103 	nat->nat_bytes[0] += fin->fin_plen;
4104 	nat->nat_pkts[0]++;
4105 	MUTEX_EXIT(&nat->nat_lock);
4106 
4107 	fin->fin_ip->ip_dst = nat->nat_inip;
4108 	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4109 	if (nflags & IPN_TCPUDP)
4110 		tcp = fin->fin_dp;
4111 
4112 	/*
4113 	 * Fix up checksums, not by recalculating them, but
4114 	 * simply computing adjustments.
4115 	 * Why only do this for some platforms on inbound packets ?
4116 	 * Because for those that it is done, IP processing is yet to happen
4117 	 * and so the IPv4 header checksum has not yet been evaluated.
4118 	 * Perhaps it should always be done for the benefit of things like
4119 	 * fast forwarding (so that it doesn't need to be recomputed) but with
4120 	 * header checksum offloading, perhaps it is a moot point.
4121 	 */
4122 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4123      defined(__osf__) || defined(linux)
4124 	if (nat->nat_dir == NAT_OUTBOUND)
4125 		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4126 	else
4127 		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4128 #endif
4129 
4130 	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4131 		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4132 			tcp->th_dport = nat->nat_inport;
4133 			fin->fin_data[1] = ntohs(nat->nat_inport);
4134 		}
4135 
4136 
4137 		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4138 			icmp = fin->fin_dp;
4139 
4140 			icmp->icmp_id = nat->nat_inport;
4141 		}
4142 
4143 		csump = nat_proto(fin, nat, nflags);
4144 	}
4145 
4146 	nat_update(fin, nat, np);
4147 
4148 	/*
4149 	 * In case they are being forwarded, inbound packets always need to have
4150 	 * their checksum adjusted even if hardware checksum validation said OK.
4151 	 */
4152 	if (csump != NULL) {
4153 		if (nat->nat_dir == NAT_OUTBOUND)
4154 			fix_incksum(csump, nat->nat_sumd[0]);
4155 		else
4156 			fix_outcksum(csump, nat->nat_sumd[0]);
4157 	}
4158 
4159 #if SOLARIS && defined(_KERNEL)
4160 	if (nflags & IPN_TCPUDP &&
4161 	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4162 		/*
4163 		 * Need to adjust the partial checksum result stored in
4164 		 * db_cksum16, which will be used for validation in IP.
4165 		 * See IP_CKSUM_RECV().
4166 		 * Adjustment data should be the inverse of the IP address
4167 		 * changes, because db_cksum16 is supposed to be the complement
4168 		 * of the pesudo header.
4169 		 */
4170 		csump = &fin->fin_m->b_datap->db_cksum16;
4171 		if (nat->nat_dir == NAT_OUTBOUND)
4172 			fix_outcksum(csump, nat->nat_sumd[1]);
4173 		else
4174 			fix_incksum(csump, nat->nat_sumd[1]);
4175 	}
4176 #endif
4177 
4178 	ATOMIC_INCL(ifs->ifs_nat_stats.ns_mapped[0]);
4179 	fin->fin_flx |= FI_NATED;
4180 	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4181 		fin->fin_nattag = &np->in_tag;
4182 	return 1;
4183 }
4184 
4185 
4186 /* ------------------------------------------------------------------------ */
4187 /* Function:    nat_proto                                                   */
4188 /* Returns:     u_short* - pointer to transport header checksum to update,  */
4189 /*                         NULL if the transport protocol is not recognised */
4190 /*                         as needing a checksum update.                    */
4191 /* Parameters:  fin(I)    - pointer to packet information                   */
4192 /*              nat(I)    - pointer to NAT structure                        */
4193 /*              nflags(I) - NAT flags set for this packet                   */
4194 /*                                                                          */
4195 /* Return the pointer to the checksum field for each protocol so understood.*/
4196 /* If support for making other changes to a protocol header is required,    */
4197 /* that is not strictly 'address' translation, such as clamping the MSS in  */
4198 /* TCP down to a specific value, then do it from here.                      */
4199 /* ------------------------------------------------------------------------ */
4200 u_short *nat_proto(fin, nat, nflags)
4201 fr_info_t *fin;
4202 nat_t *nat;
4203 u_int nflags;
4204 {
4205 	icmphdr_t *icmp;
4206 	u_short *csump;
4207 	tcphdr_t *tcp;
4208 	udphdr_t *udp;
4209 
4210 	csump = NULL;
4211 	if (fin->fin_out == 0) {
4212 		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4213 	} else {
4214 		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4215 	}
4216 
4217 	switch (fin->fin_p)
4218 	{
4219 	case IPPROTO_TCP :
4220 		tcp = fin->fin_dp;
4221 
4222 		csump = &tcp->th_sum;
4223 
4224 		/*
4225 		 * Do a MSS CLAMPING on a SYN packet,
4226 		 * only deal IPv4 for now.
4227 		 */
4228 		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4229 			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4230 
4231 		break;
4232 
4233 	case IPPROTO_UDP :
4234 		udp = fin->fin_dp;
4235 
4236 		if (udp->uh_sum)
4237 			csump = &udp->uh_sum;
4238 		break;
4239 
4240 	case IPPROTO_ICMP :
4241 		icmp = fin->fin_dp;
4242 
4243 		if ((nflags & IPN_ICMPQUERY) != 0) {
4244 			if (icmp->icmp_cksum != 0)
4245 				csump = &icmp->icmp_cksum;
4246 		}
4247 		break;
4248 	}
4249 	return csump;
4250 }
4251 
4252 
4253 /* ------------------------------------------------------------------------ */
4254 /* Function:    fr_natunload                                                */
4255 /* Returns:     Nil                                                         */
4256 /* Parameters:  Nil                                                         */
4257 /*                                                                          */
4258 /* Free all memory used by NAT structures allocated at runtime.             */
4259 /* ------------------------------------------------------------------------ */
4260 void fr_natunload(ifs)
4261 ipf_stack_t *ifs;
4262 {
4263 	ipftq_t *ifq, *ifqnext;
4264 
4265 	(void) nat_clearlist(ifs);
4266 	(void) nat_flushtable(ifs);
4267 
4268 	/*
4269 	 * Proxy timeout queues are not cleaned here because although they
4270 	 * exist on the NAT list, appr_unload is called after fr_natunload
4271 	 * and the proxies actually are responsible for them being created.
4272 	 * Should the proxy timeouts have their own list?  There's no real
4273 	 * justification as this is the only complication.
4274 	 */
4275 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4276 		ifqnext = ifq->ifq_next;
4277 		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4278 		    (fr_deletetimeoutqueue(ifq) == 0))
4279 			fr_freetimeoutqueue(ifq, ifs);
4280 	}
4281 
4282 	if (ifs->ifs_nat_table[0] != NULL) {
4283 		KFREES(ifs->ifs_nat_table[0],
4284 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4285 		ifs->ifs_nat_table[0] = NULL;
4286 	}
4287 	if (ifs->ifs_nat_table[1] != NULL) {
4288 		KFREES(ifs->ifs_nat_table[1],
4289 		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4290 		ifs->ifs_nat_table[1] = NULL;
4291 	}
4292 	if (ifs->ifs_nat_rules != NULL) {
4293 		KFREES(ifs->ifs_nat_rules,
4294 		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4295 		ifs->ifs_nat_rules = NULL;
4296 	}
4297 	if (ifs->ifs_rdr_rules != NULL) {
4298 		KFREES(ifs->ifs_rdr_rules,
4299 		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4300 		ifs->ifs_rdr_rules = NULL;
4301 	}
4302 	if (ifs->ifs_maptable != NULL) {
4303 		KFREES(ifs->ifs_maptable,
4304 		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4305 		ifs->ifs_maptable = NULL;
4306 	}
4307 	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4308 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4309 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4310 		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4311 	}
4312 	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4313 		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4314 		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4315 		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4316 	}
4317 
4318 	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4319 		ifs->ifs_fr_nat_maxbucket = 0;
4320 
4321 	if (ifs->ifs_fr_nat_init == 1) {
4322 		ifs->ifs_fr_nat_init = 0;
4323 		fr_sttab_destroy(ifs->ifs_nat_tqb);
4324 
4325 		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4326 		RW_DESTROY(&ifs->ifs_ipf_nat);
4327 
4328 		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4329 		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4330 
4331 		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4332 		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4333 		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4334 	}
4335 }
4336 
4337 
4338 /* ------------------------------------------------------------------------ */
4339 /* Function:    fr_natexpire                                                */
4340 /* Returns:     Nil                                                         */
4341 /* Parameters:  Nil                                                         */
4342 /*                                                                          */
4343 /* Check all of the timeout queues for entries at the top which need to be  */
4344 /* expired.                                                                 */
4345 /* ------------------------------------------------------------------------ */
4346 void fr_natexpire(ifs)
4347 ipf_stack_t *ifs;
4348 {
4349 	ipftq_t *ifq, *ifqnext;
4350 	ipftqent_t *tqe, *tqn;
4351 	int i;
4352 	SPL_INT(s);
4353 
4354 	SPL_NET(s);
4355 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4356 	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4357 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4358 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4359 				break;
4360 			tqn = tqe->tqe_next;
4361 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4362 		}
4363 	}
4364 
4365 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4366 		ifqnext = ifq->ifq_next;
4367 
4368 		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4369 			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4370 				break;
4371 			tqn = tqe->tqe_next;
4372 			nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4373 		}
4374 	}
4375 
4376 	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4377 		ifqnext = ifq->ifq_next;
4378 
4379 		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4380 		    (ifq->ifq_ref == 0)) {
4381 			fr_freetimeoutqueue(ifq, ifs);
4382 		}
4383 	}
4384 
4385 	if (ifs->ifs_nat_doflush != 0) {
4386 		(void) nat_extraflush(2, ifs);
4387 		ifs->ifs_nat_doflush = 0;
4388 	}
4389 
4390 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4391 	SPL_X(s);
4392 }
4393 
4394 
4395 /* ------------------------------------------------------------------------ */
4396 /* Function:    fr_nataddrsync                                              */
4397 /* Returns:     Nil                                                         */
4398 /* Parameters:  ifp(I) -  pointer to network interface                      */
4399 /*              addr(I) - pointer to new network address                    */
4400 /*                                                                          */
4401 /* Walk through all of the currently active NAT sessions, looking for those */
4402 /* which need to have their translated address updated (where the interface */
4403 /* matches the one passed in) and change it, recalculating the checksum sum */
4404 /* difference too.                                                          */
4405 /* ------------------------------------------------------------------------ */
4406 void fr_nataddrsync(ifp, addr, ifs)
4407 void *ifp;
4408 struct in_addr *addr;
4409 ipf_stack_t *ifs;
4410 {
4411 	u_32_t sum1, sum2, sumd;
4412 	nat_t *nat;
4413 	ipnat_t *np;
4414 	SPL_INT(s);
4415 
4416 	if (ifs->ifs_fr_running <= 0)
4417 		return;
4418 
4419 	SPL_NET(s);
4420 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4421 
4422 	if (ifs->ifs_fr_running <= 0) {
4423 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4424 		return;
4425 	}
4426 
4427 	/*
4428 	 * Change IP addresses for NAT sessions for any protocol except TCP
4429 	 * since it will break the TCP connection anyway.  The only rules
4430 	 * which will get changed are those which are "map ... -> 0/32",
4431 	 * where the rule specifies the address is taken from the interface.
4432 	 */
4433 	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4434 		if (addr != NULL) {
4435 			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4436 			    ((nat->nat_flags & IPN_TCP) != 0))
4437 				continue;
4438 			if (((np = nat->nat_ptr) == NULL) ||
4439 			    (np->in_nip || (np->in_outmsk != 0xffffffff)))
4440 				continue;
4441 
4442 			/*
4443 			 * Change the map-to address to be the same as the
4444 			 * new one.
4445 			 */
4446 			sum1 = nat->nat_outip.s_addr;
4447 			nat->nat_outip = *addr;
4448 			sum2 = nat->nat_outip.s_addr;
4449 
4450 		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4451 		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
4452 		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
4453 			struct in_addr in;
4454 
4455 			/*
4456 			 * Change the map-to address to be the same as the
4457 			 * new one.
4458 			 */
4459 			sum1 = nat->nat_outip.s_addr;
4460 			if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4461 				       &in, NULL, ifs) != -1)
4462 				nat->nat_outip = in;
4463 			sum2 = nat->nat_outip.s_addr;
4464 		} else {
4465 			continue;
4466 		}
4467 
4468 		if (sum1 == sum2)
4469 			continue;
4470 		/*
4471 		 * Readjust the checksum adjustment to take into
4472 		 * account the new IP#.
4473 		 */
4474 		CALC_SUMD(sum1, sum2, sumd);
4475 		/* XXX - dont change for TCP when solaris does
4476 		 * hardware checksumming.
4477 		 */
4478 		sumd += nat->nat_sumd[0];
4479 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4480 		nat->nat_sumd[1] = nat->nat_sumd[0];
4481 	}
4482 
4483 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4484 	SPL_X(s);
4485 }
4486 
4487 
4488 /* ------------------------------------------------------------------------ */
4489 /* Function:    fr_natifpsync                                               */
4490 /* Returns:     Nil                                                         */
4491 /* Parameters:  action(I) - how we are syncing                              */
4492 /*              ifp(I)    - pointer to network interface                    */
4493 /*              name(I)   - name of interface to sync to                    */
4494 /*                                                                          */
4495 /* This function is used to resync the mapping of interface names and their */
4496 /* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4497 /* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4498 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4499 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4500 /* there is no longer any interface associated with it.                     */
4501 /* ------------------------------------------------------------------------ */
4502 void fr_natifpsync(action, ifp, name, ifs)
4503 int action;
4504 void *ifp;
4505 char *name;
4506 ipf_stack_t *ifs;
4507 {
4508 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4509 	int s;
4510 #endif
4511 	nat_t *nat;
4512 	ipnat_t *n;
4513 
4514 	if (ifs->ifs_fr_running <= 0)
4515 		return;
4516 
4517 	SPL_NET(s);
4518 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4519 
4520 	if (ifs->ifs_fr_running <= 0) {
4521 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4522 		return;
4523 	}
4524 
4525 	switch (action)
4526 	{
4527 	case IPFSYNC_RESYNC :
4528 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4529 			if ((ifp == nat->nat_ifps[0]) ||
4530 			    (nat->nat_ifps[0] == (void *)-1)) {
4531 				nat->nat_ifps[0] =
4532 				    fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
4533 			}
4534 
4535 			if ((ifp == nat->nat_ifps[1]) ||
4536 			    (nat->nat_ifps[1] == (void *)-1)) {
4537 				nat->nat_ifps[1] =
4538 				    fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
4539 			}
4540 		}
4541 
4542 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4543 			if (n->in_ifps[0] == ifp ||
4544 			    n->in_ifps[0] == (void *)-1) {
4545 				n->in_ifps[0] =
4546 				    fr_resolvenic(n->in_ifnames[0], 4, ifs);
4547 			}
4548 			if (n->in_ifps[1] == ifp ||
4549 			    n->in_ifps[1] == (void *)-1) {
4550 				n->in_ifps[1] =
4551 				    fr_resolvenic(n->in_ifnames[1], 4, ifs);
4552 			}
4553 		}
4554 		break;
4555 	case IPFSYNC_NEWIFP :
4556 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4557 			if (!strncmp(name, nat->nat_ifnames[0],
4558 				     sizeof(nat->nat_ifnames[0])))
4559 				nat->nat_ifps[0] = ifp;
4560 			if (!strncmp(name, nat->nat_ifnames[1],
4561 				     sizeof(nat->nat_ifnames[1])))
4562 				nat->nat_ifps[1] = ifp;
4563 		}
4564 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4565 			if (!strncmp(name, n->in_ifnames[0],
4566 				     sizeof(n->in_ifnames[0])))
4567 				n->in_ifps[0] = ifp;
4568 			if (!strncmp(name, n->in_ifnames[1],
4569 				     sizeof(n->in_ifnames[1])))
4570 				n->in_ifps[1] = ifp;
4571 		}
4572 		break;
4573 	case IPFSYNC_OLDIFP :
4574 		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4575 			if (ifp == nat->nat_ifps[0])
4576 				nat->nat_ifps[0] = (void *)-1;
4577 			if (ifp == nat->nat_ifps[1])
4578 				nat->nat_ifps[1] = (void *)-1;
4579 		}
4580 		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4581 			if (n->in_ifps[0] == ifp)
4582 				n->in_ifps[0] = (void *)-1;
4583 			if (n->in_ifps[1] == ifp)
4584 				n->in_ifps[1] = (void *)-1;
4585 		}
4586 		break;
4587 	}
4588 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4589 	SPL_X(s);
4590 }
4591 
4592 
4593 /* ------------------------------------------------------------------------ */
4594 /* Function:    nat_icmpquerytype4                                          */
4595 /* Returns:     int - 1 == success, 0 == failure                            */
4596 /* Parameters:  icmptype(I) - ICMP type number                              */
4597 /*                                                                          */
4598 /* Tests to see if the ICMP type number passed is a query/response type or  */
4599 /* not.                                                                     */
4600 /* ------------------------------------------------------------------------ */
4601 static INLINE int nat_icmpquerytype4(icmptype)
4602 int icmptype;
4603 {
4604 
4605 	/*
4606 	 * For the ICMP query NAT code, it is essential that both the query
4607 	 * and the reply match on the NAT rule. Because the NAT structure
4608 	 * does not keep track of the icmptype, and a single NAT structure
4609 	 * is used for all icmp types with the same src, dest and id, we
4610 	 * simply define the replies as queries as well. The funny thing is,
4611 	 * altough it seems silly to call a reply a query, this is exactly
4612 	 * as it is defined in the IPv4 specification
4613 	 */
4614 
4615 	switch (icmptype)
4616 	{
4617 
4618 	case ICMP_ECHOREPLY:
4619 	case ICMP_ECHO:
4620 	/* route aedvertisement/solliciation is currently unsupported: */
4621 	/* it would require rewriting the ICMP data section            */
4622 	case ICMP_TSTAMP:
4623 	case ICMP_TSTAMPREPLY:
4624 	case ICMP_IREQ:
4625 	case ICMP_IREQREPLY:
4626 	case ICMP_MASKREQ:
4627 	case ICMP_MASKREPLY:
4628 		return 1;
4629 	default:
4630 		return 0;
4631 	}
4632 }
4633 
4634 
4635 /* ------------------------------------------------------------------------ */
4636 /* Function:    nat_log                                                     */
4637 /* Returns:     Nil                                                         */
4638 /* Parameters:  nat(I)  - pointer to NAT structure                          */
4639 /*              type(I) - type of log entry to create                       */
4640 /*                                                                          */
4641 /* Creates a NAT log entry.                                                 */
4642 /* ------------------------------------------------------------------------ */
4643 void nat_log(nat, type, ifs)
4644 struct nat *nat;
4645 u_int type;
4646 ipf_stack_t *ifs;
4647 {
4648 #ifdef	IPFILTER_LOG
4649 # ifndef LARGE_NAT
4650 	struct ipnat *np;
4651 	int rulen;
4652 # endif
4653 	struct natlog natl;
4654 	void *items[1];
4655 	size_t sizes[1];
4656 	int types[1];
4657 
4658 	natl.nl_inip = nat->nat_inip;
4659 	natl.nl_outip = nat->nat_outip;
4660 	natl.nl_origip = nat->nat_oip;
4661 	natl.nl_bytes[0] = nat->nat_bytes[0];
4662 	natl.nl_bytes[1] = nat->nat_bytes[1];
4663 	natl.nl_pkts[0] = nat->nat_pkts[0];
4664 	natl.nl_pkts[1] = nat->nat_pkts[1];
4665 	natl.nl_origport = nat->nat_oport;
4666 	natl.nl_inport = nat->nat_inport;
4667 	natl.nl_outport = nat->nat_outport;
4668 	natl.nl_p = nat->nat_p;
4669 	natl.nl_type = type;
4670 	natl.nl_rule = -1;
4671 # ifndef LARGE_NAT
4672 	if (nat->nat_ptr != NULL) {
4673 		for (rulen = 0, np = ifs->ifs_nat_list; np;
4674 		     np = np->in_next, rulen++)
4675 			if (np == nat->nat_ptr) {
4676 				natl.nl_rule = rulen;
4677 				break;
4678 			}
4679 	}
4680 # endif
4681 	items[0] = &natl;
4682 	sizes[0] = sizeof(natl);
4683 	types[0] = 0;
4684 
4685 	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
4686 #endif
4687 }
4688 
4689 
4690 #if defined(__OpenBSD__)
4691 /* ------------------------------------------------------------------------ */
4692 /* Function:    nat_ifdetach                                                */
4693 /* Returns:     Nil                                                         */
4694 /* Parameters:  ifp(I) - pointer to network interface                       */
4695 /*                                                                          */
4696 /* Compatibility interface for OpenBSD to trigger the correct updating of   */
4697 /* interface references within IPFilter.                                    */
4698 /* ------------------------------------------------------------------------ */
4699 void nat_ifdetach(ifp, ifs)
4700 void *ifp;
4701 ipf_stack_t *ifs;
4702 {
4703 	frsync(ifp, ifs);
4704 	return;
4705 }
4706 #endif
4707 
4708 
4709 /* ------------------------------------------------------------------------ */
4710 /* Function:    fr_ipnatderef                                               */
4711 /* Returns:     Nil                                                         */
4712 /* Parameters:  isp(I) - pointer to pointer to NAT rule                     */
4713 /* Write Locks: ipf_nat                                                     */
4714 /*                                                                          */
4715 /* ------------------------------------------------------------------------ */
4716 void fr_ipnatderef(inp, ifs)
4717 ipnat_t **inp;
4718 ipf_stack_t *ifs;
4719 {
4720 	ipnat_t *in;
4721 
4722 	in = *inp;
4723 	*inp = NULL;
4724 	in->in_space++;
4725 	in->in_use--;
4726 	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
4727 		if (in->in_apr)
4728 			appr_free(in->in_apr);
4729 		KFREE(in);
4730 		ifs->ifs_nat_stats.ns_rules--;
4731 #ifdef notdef
4732 #if SOLARIS
4733 		if (ifs->ifs_nat_stats.ns_rules == 0)
4734 			ifs->ifs_pfil_delayed_copy = 1;
4735 #endif
4736 #endif
4737 	}
4738 }
4739 
4740 
4741 /* ------------------------------------------------------------------------ */
4742 /* Function:    fr_natderef                                                 */
4743 /* Returns:     Nil                                                         */
4744 /* Parameters:  isp(I) - pointer to pointer to NAT table entry              */
4745 /*                                                                          */
4746 /* Decrement the reference counter for this NAT table entry and free it if  */
4747 /* there are no more things using it.                                       */
4748 /*                                                                          */
4749 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
4750 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
4751 /* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
4752 /* because nat_delete() will do that and send nat_ref to -1.                */
4753 /*                                                                          */
4754 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
4755 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
4756 /* ------------------------------------------------------------------------ */
4757 void fr_natderef(natp, ifs)
4758 nat_t **natp;
4759 ipf_stack_t *ifs;
4760 {
4761 	nat_t *nat;
4762 
4763 	nat = *natp;
4764 	*natp = NULL;
4765 
4766 	MUTEX_ENTER(&nat->nat_lock);
4767 	if (nat->nat_ref > 1) {
4768 		nat->nat_ref--;
4769 		MUTEX_EXIT(&nat->nat_lock);
4770 		return;
4771 	}
4772 	MUTEX_EXIT(&nat->nat_lock);
4773 
4774 	WRITE_ENTER(&ifs->ifs_ipf_nat);
4775 	nat_delete(nat, NL_EXPIRE, ifs);
4776 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4777 }
4778 
4779 
4780 /* ------------------------------------------------------------------------ */
4781 /* Function:    fr_natclone                                                 */
4782 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
4783 /*                           else pointer to new state structure            */
4784 /* Parameters:  fin(I) - pointer to packet information                      */
4785 /*              is(I)  - pointer to master state structure                  */
4786 /* Write Lock:  ipf_nat                                                     */
4787 /*                                                                          */
4788 /* Create a "duplcate" state table entry from the master.                   */
4789 /* ------------------------------------------------------------------------ */
4790 static nat_t *fr_natclone(fin, nat)
4791 fr_info_t *fin;
4792 nat_t *nat;
4793 {
4794 	frentry_t *fr;
4795 	nat_t *clone;
4796 	ipnat_t *np;
4797 	ipf_stack_t *ifs = fin->fin_ifs;
4798 
4799 	KMALLOC(clone, nat_t *);
4800 	if (clone == NULL)
4801 		return NULL;
4802 	bcopy((char *)nat, (char *)clone, sizeof(*clone));
4803 
4804 	MUTEX_NUKE(&clone->nat_lock);
4805 
4806 	clone->nat_aps = NULL;
4807 	/*
4808 	 * Initialize all these so that nat_delete() doesn't cause a crash.
4809 	 */
4810 	clone->nat_tqe.tqe_pnext = NULL;
4811 	clone->nat_tqe.tqe_next = NULL;
4812 	clone->nat_tqe.tqe_ifq = NULL;
4813 	clone->nat_tqe.tqe_parent = clone;
4814 
4815 	clone->nat_flags &= ~SI_CLONE;
4816 	clone->nat_flags |= SI_CLONED;
4817 
4818 	if (clone->nat_hm)
4819 		clone->nat_hm->hm_ref++;
4820 
4821 	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
4822 		KFREE(clone);
4823 		return NULL;
4824 	}
4825 	np = clone->nat_ptr;
4826 	if (np != NULL) {
4827 		if (ifs->ifs_nat_logging)
4828 			nat_log(clone, (u_int)np->in_redir, ifs);
4829 		np->in_use++;
4830 	}
4831 	fr = clone->nat_fr;
4832 	if (fr != NULL) {
4833 		MUTEX_ENTER(&fr->fr_lock);
4834 		fr->fr_ref++;
4835 		MUTEX_EXIT(&fr->fr_lock);
4836 	}
4837 
4838 	/*
4839 	 * Because the clone is created outside the normal loop of things and
4840 	 * TCP has special needs in terms of state, initialise the timeout
4841 	 * state of the new NAT from here.
4842 	 */
4843 	if (clone->nat_p == IPPROTO_TCP) {
4844 		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
4845 				  clone->nat_flags);
4846 	}
4847 #ifdef	IPFILTER_SYNC
4848 	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
4849 #endif
4850 	if (ifs->ifs_nat_logging)
4851 		nat_log(clone, NL_CLONE, ifs);
4852 	return clone;
4853 }
4854 
4855 
4856 /* ------------------------------------------------------------------------ */
4857 /* Function:   nat_wildok                                                   */
4858 /* Returns:    int - 1 == packet's ports match wildcards                    */
4859 /*                   0 == packet's ports don't match wildcards              */
4860 /* Parameters: nat(I)   - NAT entry                                         */
4861 /*             sport(I) - source port                                       */
4862 /*             dport(I) - destination port                                  */
4863 /*             flags(I) - wildcard flags                                    */
4864 /*             dir(I)   - packet direction                                  */
4865 /*                                                                          */
4866 /* Use NAT entry and packet direction to determine which combination of     */
4867 /* wildcard flags should be used.                                           */
4868 /* ------------------------------------------------------------------------ */
4869 static INLINE int nat_wildok(nat, sport, dport, flags, dir)
4870 nat_t *nat;
4871 int sport;
4872 int dport;
4873 int flags;
4874 int dir;
4875 {
4876 	/*
4877 	 * When called by       dir is set to
4878 	 * nat_inlookup         NAT_INBOUND (0)
4879 	 * nat_outlookup        NAT_OUTBOUND (1)
4880 	 *
4881 	 * We simply combine the packet's direction in dir with the original
4882 	 * "intended" direction of that NAT entry in nat->nat_dir to decide
4883 	 * which combination of wildcard flags to allow.
4884 	 */
4885 
4886 	switch ((dir << 1) | nat->nat_dir)
4887 	{
4888 	case 3: /* outbound packet / outbound entry */
4889 		if (((nat->nat_inport == sport) ||
4890 		    (flags & SI_W_SPORT)) &&
4891 		    ((nat->nat_oport == dport) ||
4892 		    (flags & SI_W_DPORT)))
4893 			return 1;
4894 		break;
4895 	case 2: /* outbound packet / inbound entry */
4896 		if (((nat->nat_outport == sport) ||
4897 		    (flags & SI_W_DPORT)) &&
4898 		    ((nat->nat_oport == dport) ||
4899 		    (flags & SI_W_SPORT)))
4900 			return 1;
4901 		break;
4902 	case 1: /* inbound packet / outbound entry */
4903 		if (((nat->nat_oport == sport) ||
4904 		    (flags & SI_W_DPORT)) &&
4905 		    ((nat->nat_outport == dport) ||
4906 		    (flags & SI_W_SPORT)))
4907 			return 1;
4908 		break;
4909 	case 0: /* inbound packet / inbound entry */
4910 		if (((nat->nat_oport == sport) ||
4911 		    (flags & SI_W_SPORT)) &&
4912 		    ((nat->nat_outport == dport) ||
4913 		    (flags & SI_W_DPORT)))
4914 			return 1;
4915 		break;
4916 	default:
4917 		break;
4918 	}
4919 
4920 	return(0);
4921 }
4922 
4923 
4924 /* ------------------------------------------------------------------------ */
4925 /* Function:    nat_mssclamp                                                */
4926 /* Returns:     Nil                                                         */
4927 /* Parameters:  tcp(I)    - pointer to TCP header                           */
4928 /*              maxmss(I) - value to clamp the TCP MSS to                   */
4929 /*              csump(I)  - pointer to TCP checksum                         */
4930 /*                                                                          */
4931 /* Check for MSS option and clamp it if necessary.  If found and changed,   */
4932 /* then the TCP header checksum will be updated to reflect the change in    */
4933 /* the MSS.                                                                 */
4934 /* ------------------------------------------------------------------------ */
4935 static void nat_mssclamp(tcp, maxmss, csump)
4936 tcphdr_t *tcp;
4937 u_32_t maxmss;
4938 u_short *csump;
4939 {
4940 	u_char *cp, *ep, opt;
4941 	int hlen, advance;
4942 	u_32_t mss, sumd;
4943 
4944 	hlen = TCP_OFF(tcp) << 2;
4945 	if (hlen > sizeof(*tcp)) {
4946 		cp = (u_char *)tcp + sizeof(*tcp);
4947 		ep = (u_char *)tcp + hlen;
4948 
4949 		while (cp < ep) {
4950 			opt = cp[0];
4951 			if (opt == TCPOPT_EOL)
4952 				break;
4953 			else if (opt == TCPOPT_NOP) {
4954 				cp++;
4955 				continue;
4956 			}
4957 
4958 			if (cp + 1 >= ep)
4959 				break;
4960 			advance = cp[1];
4961 			if ((cp + advance > ep) || (advance <= 0))
4962 				break;
4963 			switch (opt)
4964 			{
4965 			case TCPOPT_MAXSEG:
4966 				if (advance != 4)
4967 					break;
4968 				mss = cp[2] * 256 + cp[3];
4969 				if (mss > maxmss) {
4970 					cp[2] = maxmss / 256;
4971 					cp[3] = maxmss & 0xff;
4972 					CALC_SUMD(mss, maxmss, sumd);
4973 					fix_outcksum(csump, sumd);
4974 				}
4975 				break;
4976 			default:
4977 				/* ignore unknown options */
4978 				break;
4979 			}
4980 
4981 			cp += advance;
4982 		}
4983 	}
4984 }
4985 
4986 
4987 /* ------------------------------------------------------------------------ */
4988 /* Function:    fr_setnatqueue                                              */
4989 /* Returns:     Nil                                                         */
4990 /* Parameters:  nat(I)- pointer to NAT structure                            */
4991 /*              rev(I) - forward(0) or reverse(1) direction                 */
4992 /* Locks:       ipf_nat (read or write)                                     */
4993 /*                                                                          */
4994 /* Put the NAT entry on its default queue entry, using rev as a helped in   */
4995 /* determining which queue it should be placed on.                          */
4996 /* ------------------------------------------------------------------------ */
4997 void fr_setnatqueue(nat, rev, ifs)
4998 nat_t *nat;
4999 int rev;
5000 ipf_stack_t *ifs;
5001 {
5002 	ipftq_t *oifq, *nifq;
5003 
5004 	if (nat->nat_ptr != NULL)
5005 		nifq = nat->nat_ptr->in_tqehead[rev];
5006 	else
5007 		nifq = NULL;
5008 
5009 	if (nifq == NULL) {
5010 		switch (nat->nat_p)
5011 		{
5012 		case IPPROTO_UDP :
5013 			nifq = &ifs->ifs_nat_udptq;
5014 			break;
5015 		case IPPROTO_ICMP :
5016 			nifq = &ifs->ifs_nat_icmptq;
5017 			break;
5018 		case IPPROTO_TCP :
5019 			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5020 			break;
5021 		default :
5022 			nifq = &ifs->ifs_nat_iptq;
5023 			break;
5024 		}
5025 	}
5026 
5027 	oifq = nat->nat_tqe.tqe_ifq;
5028 	/*
5029 	 * If it's currently on a timeout queue, move it from one queue to
5030 	 * another, else put it on the end of the newly determined queue.
5031 	 */
5032 	if (oifq != NULL)
5033 		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5034 	else
5035 		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5036 	return;
5037 }
5038 
5039 /* ------------------------------------------------------------------------ */
5040 /* Function:    nat_getnext                                                 */
5041 /* Returns:     int - 0 == ok, else error                                   */
5042 /* Parameters:  t(I)   - pointer to ipftoken structure                      */
5043 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5044 /*              ifs - ipf stack instance                                    */
5045 /*                                                                          */
5046 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5047 /* and copy it out to the storage space pointed to by itp.  The next item   */
5048 /* in the list to look at is put back in the ipftoken struture.             */
5049 /* ------------------------------------------------------------------------ */
5050 static int nat_getnext(t, itp, ifs)
5051 ipftoken_t *t;
5052 ipfgeniter_t *itp;
5053 ipf_stack_t *ifs;
5054 {
5055 	hostmap_t *hm, *nexthm = NULL, zerohm;
5056 	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5057 	nat_t *nat, *nextnat = NULL, zeronat;
5058 	int error = 0, count;
5059 	char *dst;
5060 
5061 	if (itp->igi_nitems == 0)
5062 		return EINVAL;
5063 
5064 	READ_ENTER(&ifs->ifs_ipf_nat);
5065 
5066 	switch (itp->igi_type)
5067 	{
5068 	case IPFGENITER_HOSTMAP :
5069 		hm = t->ipt_data;
5070 		if (hm == NULL) {
5071 			nexthm = ifs->ifs_ipf_hm_maplist;
5072 		} else {
5073 			nexthm = hm->hm_next;
5074 		}
5075 		break;
5076 
5077 	case IPFGENITER_IPNAT :
5078 		ipn = t->ipt_data;
5079 		if (ipn == NULL) {
5080 			nextipnat = ifs->ifs_nat_list;
5081 		} else {
5082 			nextipnat = ipn->in_next;
5083 		}
5084 		break;
5085 
5086 	case IPFGENITER_NAT :
5087 		nat = t->ipt_data;
5088 		if (nat == NULL) {
5089 			nextnat = ifs->ifs_nat_instances;
5090 		} else {
5091 			nextnat = nat->nat_next;
5092 		}
5093 		break;
5094 	default :
5095 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5096 		return EINVAL;
5097 	}
5098 
5099 	dst = itp->igi_data;
5100 	for (count = itp->igi_nitems; count > 0; count--) {
5101 		switch (itp->igi_type)
5102 		{
5103 		case IPFGENITER_HOSTMAP :
5104 			if (nexthm != NULL) {
5105 				ATOMIC_INC32(nexthm->hm_ref);
5106 				t->ipt_data = nexthm;
5107 			} else {
5108 				bzero(&zerohm, sizeof(zerohm));
5109 				nexthm = &zerohm;
5110 				count = 1;
5111 				t->ipt_data = NULL;
5112 			}
5113 			break;
5114 		case IPFGENITER_IPNAT :
5115 			if (nextipnat != NULL) {
5116 				ATOMIC_INC32(nextipnat->in_use);
5117 				t->ipt_data = nextipnat;
5118 			} else {
5119 				bzero(&zeroipn, sizeof(zeroipn));
5120 				nextipnat = &zeroipn;
5121 				count = 1;
5122 				t->ipt_data = NULL;
5123 			}
5124 			break;
5125 		case IPFGENITER_NAT :
5126 			if (nextnat != NULL) {
5127 				MUTEX_ENTER(&nextnat->nat_lock);
5128 				nextnat->nat_ref++;
5129 				MUTEX_EXIT(&nextnat->nat_lock);
5130 				t->ipt_data = nextnat;
5131 			} else {
5132 				bzero(&zeronat, sizeof(zeronat));
5133 				nextnat = &zeronat;
5134 				count = 1;
5135 				t->ipt_data = NULL;
5136 			}
5137 			break;
5138 		default :
5139 			break;
5140 		}
5141 
5142 		/*
5143 		 * We can safely release our hold on ipf_nat.
5144 		 */
5145 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5146 
5147 		switch (itp->igi_type)
5148 		{
5149 		case IPFGENITER_HOSTMAP :
5150 			if (hm != NULL) {
5151 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5152 				fr_hostmapdel(&hm);
5153 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5154 			}
5155 			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5156 			if (error != 0) {
5157 				error = EFAULT;
5158 			} else {
5159 				dst += sizeof(*nexthm);
5160 				hm = nexthm;
5161 				nexthm = nexthm->hm_next;
5162 			}
5163 			break;
5164 		case IPFGENITER_IPNAT :
5165 			if (ipn != NULL) {
5166 				WRITE_ENTER(&ifs->ifs_ipf_nat);
5167 				fr_ipnatderef(&ipn, ifs);
5168 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5169 			}
5170 			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5171 			if (error != 0) {
5172 				error = EFAULT;
5173 			} else {
5174 				dst += sizeof(*nextipnat);
5175 				ipn = nextipnat;
5176 				nextipnat = nextipnat->in_next;
5177 			}
5178 			break;
5179 		case IPFGENITER_NAT :
5180 			if (nat != NULL) {
5181 				fr_natderef(&nat, ifs);
5182 			}
5183 			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5184 			if (error != 0) {
5185 				error = EFAULT;
5186 			} else {
5187 				dst += sizeof(*nextnat);
5188 				nat = nextnat;
5189 				nextnat = nextnat->nat_next;
5190 			}
5191 			break;
5192 		default :
5193 			break;
5194 		}
5195 
5196 		if ((count == 1) || (error != 0))
5197 			break;
5198 
5199 		READ_ENTER(&ifs->ifs_ipf_nat);
5200 	}
5201 
5202 	return error;
5203 }
5204 
5205 
5206 /* ------------------------------------------------------------------------ */
5207 /* Function:    nat_iterator                                                */
5208 /* Returns:     int - 0 == ok, else error                                   */
5209 /* Parameters:  token(I) - pointer to ipftoken structure                    */
5210 /*              itp(I) - pointer to ipfgeniter_t structure                  */
5211 /*                                                                          */
5212 /* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5213 /* generic structure to iterate through a list.  There are three different  */
5214 /* linked lists of NAT related information to go through: NAT rules, active */
5215 /* NAT mappings and the NAT fragment cache.                                 */
5216 /* ------------------------------------------------------------------------ */
5217 static int nat_iterator(token, itp, ifs)
5218 ipftoken_t *token;
5219 ipfgeniter_t *itp;
5220 ipf_stack_t *ifs;
5221 {
5222 	int error;
5223 
5224 	if (itp->igi_data == NULL)
5225 		return EFAULT;
5226 
5227 	token->ipt_subtype = itp->igi_type;
5228 
5229 	switch (itp->igi_type)
5230 	{
5231 	case IPFGENITER_HOSTMAP :
5232 	case IPFGENITER_IPNAT :
5233 	case IPFGENITER_NAT :
5234 		error = nat_getnext(token, itp, ifs);
5235 		break;
5236 	case IPFGENITER_NATFRAG :
5237 		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5238 				    &ifs->ifs_ipfr_nattail,
5239 				    &ifs->ifs_ipf_natfrag, ifs);
5240 		break;
5241 	default :
5242 		error = EINVAL;
5243 		break;
5244 	}
5245 
5246 	return error;
5247 }
5248 
5249 
5250 /* -------------------------------------------------------------------- */
5251 /* Function:	nat_earlydrop						*/
5252 /* Returns:	number of dropped/removed entries from the queue	*/
5253 /* Parameters:	ifq - pointer to queue with entries to be processed	*/
5254 /*		maxidle - entry must be idle this long to be dropped	*/
5255 /*		ifs - ipf stack instance				*/
5256 /*									*/
5257 /* Function is invoked from nat_extraflush() only.  Removes entries	*/
5258 /* form specified timeout queue, based on how long they've sat idle,	*/
5259 /* without waiting for it to happen on its own.				*/
5260 /* -------------------------------------------------------------------- */
5261 static int nat_earlydrop(ifq, maxidle, ifs)
5262 ipftq_t *ifq;
5263 int maxidle;
5264 ipf_stack_t *ifs;
5265 {
5266 	ipftqent_t *tqe, *tqn;
5267 	nat_t *nat;
5268 	unsigned int dropped;
5269 	int droptick;
5270 
5271 	if (ifq == NULL)
5272 		return (0);
5273 
5274 	dropped = 0;
5275 
5276 	/*
5277 	 * Determine the tick representing the idle time we're interested
5278 	 * in.  If an entry exists in the queue, and it was touched before
5279 	 * that tick, then it's been idle longer than maxidle ... remove it.
5280 	 */
5281 	droptick = ifs->ifs_fr_ticks - maxidle;
5282 	tqn = ifq->ifq_head;
5283 	while ((tqe = tqn) != NULL && tqe->tqe_touched < droptick) {
5284 		tqn = tqe->tqe_next;
5285 		nat = tqe->tqe_parent;
5286 		nat_delete(nat, ISL_EXPIRE, ifs);
5287 		dropped++;
5288 	}
5289 	return (dropped);
5290 }
5291 
5292 
5293 /* --------------------------------------------------------------------- */
5294 /* Function:	nat_flushclosing					 */
5295 /* Returns:	int - number of NAT entries deleted			 */
5296 /* Parameters:	stateval(I) - State at which to start removing entries	 */
5297 /*		ifs - ipf stack instance				 */
5298 /*									 */
5299 /* Remove nat table entries for TCP connections which are in the process */
5300 /* of closing, and are in (or "beyond") state specified by 'stateval'.	 */
5301 /* --------------------------------------------------------------------- */
5302 static int nat_flushclosing(stateval, ifs)
5303 int stateval;
5304 ipf_stack_t *ifs;
5305 {
5306 	ipftq_t *ifq, *ifqn;
5307 	ipftqent_t *tqe, *tqn;
5308 	nat_t *nat;
5309 	int dropped;
5310 
5311 	dropped = 0;
5312 
5313 	/*
5314 	 * Start by deleting any entries in specific timeout queues.
5315 	 */
5316 	ifqn = &ifs->ifs_nat_tqb[stateval];
5317 	while ((ifq = ifqn) != NULL) {
5318 		ifqn = ifq->ifq_next;
5319 		dropped += nat_earlydrop(ifq, (int)0, ifs);
5320 	}
5321 
5322 	/*
5323 	 * Next, look through user defined queues for closing entries.
5324 	 */
5325 	ifqn = ifs->ifs_nat_utqe;
5326 	while ((ifq = ifqn) != NULL) {
5327 		ifqn = ifq->ifq_next;
5328 		tqn = ifq->ifq_head;
5329 		while ((tqe = tqn) != NULL) {
5330 			tqn = tqe->tqe_next;
5331 			nat = tqe->tqe_parent;
5332 			if (nat->nat_p != IPPROTO_TCP)
5333 				continue;
5334 			if ((nat->nat_tcpstate[0] >= stateval) &&
5335 			    (nat->nat_tcpstate[1] >= stateval)) {
5336 				nat_delete(nat, NL_EXPIRE, ifs);
5337 				dropped++;
5338 			}
5339 		}
5340 	}
5341 	return (dropped);
5342 }
5343 
5344 
5345 /* --------------------------------------------------------------------- */
5346 /* Function:	nat_extraflush						 */
5347 /* Returns:	int - number of NAT entries deleted			 */
5348 /* Parameters:	which(I) - how to flush the active NAT table		 */
5349 /*		ifs - ipf stack instance				 */
5350 /* Write Locks:	ipf_nat							 */
5351 /*									 */
5352 /* Flush nat tables.  Three actions currently defined:			 */
5353 /*									 */
5354 /* which == 0 :	Flush all nat table entries.				 */
5355 /*									 */
5356 /* which == 1 :	Flush entries with TCP connections which have started	 */
5357 /*		to close on both ends.					 */
5358 /*									 */
5359 /* which == 2 :	First, flush entries which are "almost" closed.  If that */
5360 /*		does not take us below specified threshold in the table, */
5361 /*		we want to flush entries with TCP connections which have */
5362 /*		been idle for a long time.  Start with connections idle	 */
5363 /*		over 12 hours,  and then work backwards in half hour	 */
5364 /*		increments to at most 30 minutes idle, and finally work	 */
5365 /*		back in 30 second increments to at most 30 seconds.	 */
5366 /* --------------------------------------------------------------------- */
5367 static int nat_extraflush(which, ifs)
5368 int which;
5369 ipf_stack_t *ifs;
5370 {
5371 	ipftq_t *ifq, *ifqn;
5372 	nat_t *nat, **natp;
5373 	int idletime, removed, idle_idx;
5374 	SPL_INT(s);
5375 
5376 	removed = 0;
5377 
5378 	SPL_NET(s);
5379 	switch (which)
5380 	{
5381 	case 0:
5382 		natp = &ifs->ifs_nat_instances;
5383 		while ((nat = *natp) != NULL) {
5384 			natp = &nat->nat_next;
5385 			nat_delete(nat, ISL_FLUSH, ifs);
5386 			removed++;
5387 		}
5388 		break;
5389 
5390 	case 1:
5391 		removed = nat_flushclosing(IPF_TCPS_CLOSE_WAIT, ifs);
5392 		break;
5393 
5394 	case 2:
5395 		removed = nat_flushclosing(IPF_TCPS_FIN_WAIT_2, ifs);
5396 
5397 		/*
5398 		 * Be sure we haven't done this in the last 10 seconds.
5399 		 */
5400 		if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5401 		    IPF_TTLVAL(10))
5402 			break;
5403 		ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5404 
5405 		/*
5406 		 * Determine initial threshold for minimum idle time based on
5407 		 * how long ipfilter has been running.  Ipfilter needs to have
5408 		 * been up as long as the smallest interval to continue on.
5409 		 *
5410 		 * Minimum idle times stored in idletime_tab and indexed by
5411 		 * idle_idx.  Start at upper end of array and work backwards.
5412 		 *
5413 		 * Once the index is found, set the initial idle time to the
5414 		 * first interval before the current ipfilter run time.
5415 		 */
5416 		if (ifs->ifs_fr_ticks < idletime_tab[0])
5417 			break;  /* switch */
5418 		idle_idx = (sizeof (idletime_tab) / sizeof (int)) - 1;
5419 		if (ifs->ifs_fr_ticks > idletime_tab[idle_idx]) {
5420 			idletime = idletime_tab[idle_idx];
5421 		} else {
5422 			while ((idle_idx > 0) &&
5423 			    (ifs->ifs_fr_ticks < idletime_tab[idle_idx]))
5424 				idle_idx--;
5425 			idletime = (ifs->ifs_fr_ticks /
5426 				    idletime_tab[idle_idx]) *
5427 				    idletime_tab[idle_idx];
5428 		}
5429 
5430 		while ((idle_idx >= 0) &&
5431 		    (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_lvl_lo)) {
5432 			/*
5433 			 * Start with appropriate timeout queue.
5434 			 */
5435 			removed += nat_earlydrop(
5436 					&ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5437 					idletime, ifs);
5438 
5439 			/*
5440 			 * Make sure we haven't already deleted enough
5441 			 * entries before checking the user defined queues.
5442 			 */
5443 			if (NAT_TAB_WATER_LEVEL(ifs) <=
5444 			    ifs->ifs_nat_flush_lvl_lo)
5445 				break;
5446 
5447 			/*
5448 			 * Next, look through the user defined queues.
5449 			 */
5450 			ifqn = ifs->ifs_nat_utqe;
5451 			while ((ifq = ifqn) != NULL) {
5452 				ifqn = ifq->ifq_next;
5453 				removed += nat_earlydrop(ifq, idletime, ifs);
5454 			}
5455 
5456 			/*
5457 			 * Adjust the granularity of idle time.
5458 			 *
5459 			 * If we reach an interval boundary, we need to
5460 			 * either adjust the idle time accordingly or exit
5461 			 * the loop altogether (if this is very last check).
5462 			 */
5463 			idletime -= idletime_tab[idle_idx];
5464 			if (idletime < idletime_tab[idle_idx]) {
5465 				if (idle_idx != 0) {
5466 					idletime = idletime_tab[idle_idx] -
5467 					    idletime_tab[idle_idx - 1];
5468 					idle_idx--;
5469 				} else {
5470 					break;  /* while */
5471 				}
5472 			}
5473 		}
5474 		break;
5475 	default:
5476 		break;
5477 	}
5478 
5479 	SPL_X(s);
5480 	return (removed);
5481 }
5482