xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_nat.c (revision 5338faaa)
1 /*
2  * Copyright (C) 1995-2004 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22     defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 #  include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 #  include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81 
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
87 
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef	IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
109 
110 #undef	SOCKADDR_IN
111 #define	SOCKADDR_IN	struct sockaddr_in
112 
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
117 
118 
119 /* ======================================================================== */
120 /* How the NAT is organised and works.                                      */
121 /*                                                                          */
122 /* Inside (interface y) NAT       Outside (interface x)                     */
123 /* -------------------- -+- -------------------------------------           */
124 /* Packet going          |   out, processsed by fr_checknatout() for x      */
125 /* ------------>         |   ------------>                                  */
126 /* src=10.1.1.1          |   src=192.1.1.1                                  */
127 /*                       |                                                  */
128 /*                       |   in, processed by fr_checknatin() for x         */
129 /* <------------         |   <------------                                  */
130 /* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131 /* -------------------- -+- -------------------------------------           */
132 /* fr_checknatout() - changes ip_src and if required, sport                 */
133 /*             - creates a new mapping, if required.                        */
134 /* fr_checknatin()  - changes ip_dst and if required, dport                 */
135 /*                                                                          */
136 /* In the NAT table, internal source is recorded as "in" and externally     */
137 /* seen as "out".                                                           */
138 /* ======================================================================== */
139 
140 
141 static	int	nat_clearlist __P((ipf_stack_t *));
142 static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
143 static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
144 static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
145 static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
146 static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
147 static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
148 static	int	nat_match __P((fr_info_t *, ipnat_t *));
149 static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
150 static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
151 static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152 				    struct in_addr, struct in_addr, u_32_t,
153 				    ipf_stack_t *));
154 static	INLINE	int nat_icmpquerytype4 __P((int));
155 static	int	nat_ruleaddrinit __P((ipnat_t *));
156 static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
157 static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158 static	INLINE	int nat_icmperrortype4 __P((int));
159 static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
160 				      tcphdr_t *, nat_t **, int));
161 static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
162 static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
163 static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
164 static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165 static	int	nat_flushtable __P((int, ipf_stack_t *));
166 
167 #define NAT_HAS_L4_CHANGED(n)	\
168  	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
169  	(n)->nat_inport != (n)->nat_outport)
170 
171 
172 /* ------------------------------------------------------------------------ */
173 /* Function:    fr_natinit                                                  */
174 /* Returns:     int - 0 == success, -1 == failure                           */
175 /* Parameters:  Nil                                                         */
176 /*                                                                          */
177 /* Initialise all of the NAT locks, tables and other structures.            */
178 /* ------------------------------------------------------------------------ */
fr_natinit(ifs)179 int fr_natinit(ifs)
180 ipf_stack_t *ifs;
181 {
182 	int i;
183 
184 	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
185 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
186 	if (ifs->ifs_nat_table[0] != NULL)
187 		bzero((char *)ifs->ifs_nat_table[0],
188 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
189 	else
190 		return -1;
191 
192 	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
193 		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
194 	if (ifs->ifs_nat_table[1] != NULL)
195 		bzero((char *)ifs->ifs_nat_table[1],
196 		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
197 	else
198 		return -2;
199 
200 	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
201 		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
202 	if (ifs->ifs_nat_rules != NULL)
203 		bzero((char *)ifs->ifs_nat_rules,
204 		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
205 	else
206 		return -3;
207 
208 	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
209 		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
210 	if (ifs->ifs_rdr_rules != NULL)
211 		bzero((char *)ifs->ifs_rdr_rules,
212 		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
213 	else
214 		return -4;
215 
216 	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
217 		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
218 	if (ifs->ifs_maptable != NULL)
219 		bzero((char *)ifs->ifs_maptable,
220 		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
221 	else
222 		return -5;
223 
224 	ifs->ifs_ipf_hm_maplist = NULL;
225 
226 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
227 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
228 	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
229 		return -1;
230 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
231 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
232 
233 	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
234 		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
235 	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
236 		return -1;
237 	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
238 	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
239 
240 	if (ifs->ifs_fr_nat_maxbucket == 0) {
241 		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
242 			ifs->ifs_fr_nat_maxbucket++;
243 		ifs->ifs_fr_nat_maxbucket *= 2;
244 	}
245 
246 	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
247 	/*
248 	 * Increase this because we may have "keep state" following this too
249 	 * and packet storms can occur if this is removed too quickly.
250 	 */
251 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
252 	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
253 	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
254 	ifs->ifs_nat_udptq.ifq_ref = 1;
255 	ifs->ifs_nat_udptq.ifq_head = NULL;
256 	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
257 	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
258 	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
259 	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
260 	ifs->ifs_nat_icmptq.ifq_ref = 1;
261 	ifs->ifs_nat_icmptq.ifq_head = NULL;
262 	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
263 	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
264 	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
265 	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
266 	ifs->ifs_nat_iptq.ifq_ref = 1;
267 	ifs->ifs_nat_iptq.ifq_head = NULL;
268 	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
269 	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
270 	ifs->ifs_nat_iptq.ifq_next = NULL;
271 
272 	for (i = 0; i < IPF_TCP_NSTATES; i++) {
273 		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
274 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
275 #ifdef LARGE_NAT
276 		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
277 			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
278 #endif
279 	}
280 
281 	/*
282 	 * Increase this because we may have "keep state" following
283 	 * this too and packet storms can occur if this is removed
284 	 * too quickly.
285 	 */
286 	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
287 	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
288 
289 	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
290 	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
291 	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
292 	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
293 
294 	ifs->ifs_fr_nat_init = 1;
295 	ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
296 	return 0;
297 }
298 
299 
300 /* ------------------------------------------------------------------------ */
301 /* Function:    nat_addrdr                                                  */
302 /* Returns:     Nil                                                         */
303 /* Parameters:  n(I) - pointer to NAT rule to add                           */
304 /*                                                                          */
305 /* Adds a redirect rule to the hash table of redirect rules and the list of */
306 /* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
307 /* use by redirect rules.                                                   */
308 /* ------------------------------------------------------------------------ */
nat_addrdr(n,ifs)309 static void nat_addrdr(n, ifs)
310 ipnat_t *n;
311 ipf_stack_t *ifs;
312 {
313 	ipnat_t **np;
314 	u_32_t j;
315 	u_int hv;
316 	int k;
317 
318 	k = count4bits(n->in_outmsk);
319 	if ((k >= 0) && (k != 32))
320 		ifs->ifs_rdr_masks |= 1 << k;
321 	j = (n->in_outip & n->in_outmsk);
322 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
323 	np = ifs->ifs_rdr_rules + hv;
324 	while (*np != NULL)
325 		np = &(*np)->in_rnext;
326 	n->in_rnext = NULL;
327 	n->in_prnext = np;
328 	n->in_hv = hv;
329 	*np = n;
330 }
331 
332 
333 /* ------------------------------------------------------------------------ */
334 /* Function:    nat_addnat                                                  */
335 /* Returns:     Nil                                                         */
336 /* Parameters:  n(I) - pointer to NAT rule to add                           */
337 /*                                                                          */
338 /* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
339 /* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
340 /* redirect rules.                                                          */
341 /* ------------------------------------------------------------------------ */
nat_addnat(n,ifs)342 static void nat_addnat(n, ifs)
343 ipnat_t *n;
344 ipf_stack_t *ifs;
345 {
346 	ipnat_t **np;
347 	u_32_t j;
348 	u_int hv;
349 	int k;
350 
351 	k = count4bits(n->in_inmsk);
352 	if ((k >= 0) && (k != 32))
353 		ifs->ifs_nat_masks |= 1 << k;
354 	j = (n->in_inip & n->in_inmsk);
355 	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
356 	np = ifs->ifs_nat_rules + hv;
357 	while (*np != NULL)
358 		np = &(*np)->in_mnext;
359 	n->in_mnext = NULL;
360 	n->in_pmnext = np;
361 	n->in_hv = hv;
362 	*np = n;
363 }
364 
365 
366 /* ------------------------------------------------------------------------ */
367 /* Function:    nat_delrdr                                                  */
368 /* Returns:     Nil                                                         */
369 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
370 /*                                                                          */
371 /* Removes a redirect rule from the hash table of redirect rules.           */
372 /* ------------------------------------------------------------------------ */
nat_delrdr(n)373 void nat_delrdr(n)
374 ipnat_t *n;
375 {
376 	if (n->in_rnext)
377 		n->in_rnext->in_prnext = n->in_prnext;
378 	*n->in_prnext = n->in_rnext;
379 }
380 
381 
382 /* ------------------------------------------------------------------------ */
383 /* Function:    nat_delnat                                                  */
384 /* Returns:     Nil                                                         */
385 /* Parameters:  n(I) - pointer to NAT rule to delete                        */
386 /*                                                                          */
387 /* Removes a NAT map rule from the hash table of NAT map rules.             */
388 /* ------------------------------------------------------------------------ */
nat_delnat(n)389 void nat_delnat(n)
390 ipnat_t *n;
391 {
392 	if (n->in_mnext != NULL)
393 		n->in_mnext->in_pmnext = n->in_pmnext;
394 	*n->in_pmnext = n->in_mnext;
395 }
396 
397 
398 /* ------------------------------------------------------------------------ */
399 /* Function:    nat_hostmap                                                 */
400 /* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
401 /*                                else a pointer to the hostmapping to use  */
402 /* Parameters:  np(I)   - pointer to NAT rule                               */
403 /*              real(I) - real IP address                                   */
404 /*              map(I)  - mapped IP address                                 */
405 /*              port(I) - destination port number                           */
406 /* Write Locks: ipf_nat                                                     */
407 /*                                                                          */
408 /* Check if an ip address has already been allocated for a given mapping    */
409 /* that is not doing port based translation.  If is not yet allocated, then */
410 /* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
411 /* ------------------------------------------------------------------------ */
nat_hostmap(np,src,dst,map,port,ifs)412 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
413 ipnat_t *np;
414 struct in_addr src;
415 struct in_addr dst;
416 struct in_addr map;
417 u_32_t port;
418 ipf_stack_t *ifs;
419 {
420 	hostmap_t *hm;
421 	u_int hv;
422 
423 	hv = (src.s_addr ^ dst.s_addr);
424 	hv += src.s_addr;
425 	hv += dst.s_addr;
426 	hv %= HOSTMAP_SIZE;
427 	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
428 		if ((hm->hm_srcip.s_addr == src.s_addr) &&
429 		    (hm->hm_dstip.s_addr == dst.s_addr) &&
430 		    ((np == NULL) || (np == hm->hm_ipnat)) &&
431 		    ((port == 0) || (port == hm->hm_port))) {
432 			hm->hm_ref++;
433 			return hm;
434 		}
435 
436 	if (np == NULL)
437 		return NULL;
438 
439 	KMALLOC(hm, hostmap_t *);
440 	if (hm) {
441 		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
442 		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
443 		if (ifs->ifs_ipf_hm_maplist != NULL)
444 			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
445 		ifs->ifs_ipf_hm_maplist = hm;
446 
447 		hm->hm_next = ifs->ifs_maptable[hv];
448 		hm->hm_pnext = ifs->ifs_maptable + hv;
449 		if (ifs->ifs_maptable[hv] != NULL)
450 			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
451 		ifs->ifs_maptable[hv] = hm;
452 		hm->hm_ipnat = np;
453 		hm->hm_srcip = src;
454 		hm->hm_dstip = dst;
455 		hm->hm_mapip = map;
456 		hm->hm_ref = 1;
457 		hm->hm_port = port;
458 		hm->hm_v = 4;
459 	}
460 	return hm;
461 }
462 
463 
464 /* ------------------------------------------------------------------------ */
465 /* Function:    fr_hostmapdel                                              */
466 /* Returns:     Nil                                                         */
467 /* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
468 /* Write Locks: ipf_nat                                                     */
469 /*                                                                          */
470 /* Decrement the references to this hostmap structure by one.  If this      */
471 /* reaches zero then remove it and free it.                                 */
472 /* ------------------------------------------------------------------------ */
fr_hostmapdel(hmp)473 void fr_hostmapdel(hmp)
474 struct hostmap **hmp;
475 {
476 	struct hostmap *hm;
477 
478 	hm = *hmp;
479 	*hmp = NULL;
480 
481 	hm->hm_ref--;
482 	if (hm->hm_ref == 0) {
483 		if (hm->hm_next)
484 			hm->hm_next->hm_pnext = hm->hm_pnext;
485 		*hm->hm_pnext = hm->hm_next;
486 		if (hm->hm_hnext)
487 			hm->hm_hnext->hm_phnext = hm->hm_phnext;
488 		*hm->hm_phnext = hm->hm_hnext;
489 		KFREE(hm);
490 	}
491 }
492 
493 
494 /* ------------------------------------------------------------------------ */
495 /* Function:    fix_outcksum                                                */
496 /* Returns:     Nil                                                         */
497 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
498 /*              n((I)  - amount to adjust checksum by                       */
499 /*                                                                          */
500 /* Adjusts the 16bit checksum by "n" for packets going out.                 */
501 /* ------------------------------------------------------------------------ */
fix_outcksum(sp,n)502 void fix_outcksum(sp, n)
503 u_short *sp;
504 u_32_t n;
505 {
506 	u_short sumshort;
507 	u_32_t sum1;
508 
509 	if (n == 0)
510 		return;
511 
512 	sum1 = (~ntohs(*sp)) & 0xffff;
513 	sum1 += (n);
514 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
515 	/* Again */
516 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
517 	sumshort = ~(u_short)sum1;
518 	*(sp) = htons(sumshort);
519 }
520 
521 
522 /* ------------------------------------------------------------------------ */
523 /* Function:    fix_incksum                                                 */
524 /* Returns:     Nil                                                         */
525 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
526 /*              n((I)  - amount to adjust checksum by                       */
527 /*                                                                          */
528 /* Adjusts the 16bit checksum by "n" for packets going in.                  */
529 /* ------------------------------------------------------------------------ */
fix_incksum(sp,n)530 void fix_incksum(sp, n)
531 u_short *sp;
532 u_32_t n;
533 {
534 	u_short sumshort;
535 	u_32_t sum1;
536 
537 	if (n == 0)
538 		return;
539 
540 	sum1 = (~ntohs(*sp)) & 0xffff;
541 	sum1 += ~(n) & 0xffff;
542 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543 	/* Again */
544 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545 	sumshort = ~(u_short)sum1;
546 	*(sp) = htons(sumshort);
547 }
548 
549 
550 /* ------------------------------------------------------------------------ */
551 /* Function:    fix_datacksum                                               */
552 /* Returns:     Nil                                                         */
553 /* Parameters:  sp(I)  - location of 16bit checksum to update               */
554 /*              n((I)  - amount to adjust checksum by                       */
555 /*                                                                          */
556 /* Fix_datacksum is used *only* for the adjustments of checksums in the     */
557 /* data section of an IP packet.                                            */
558 /*                                                                          */
559 /* The only situation in which you need to do this is when NAT'ing an       */
560 /* ICMP error message. Such a message, contains in its body the IP header   */
561 /* of the original IP packet, that causes the error.                        */
562 /*                                                                          */
563 /* You can't use fix_incksum or fix_outcksum in that case, because for the  */
564 /* kernel the data section of the ICMP error is just data, and no special   */
565 /* processing like hardware cksum or ntohs processing have been done by the */
566 /* kernel on the data section.                                              */
567 /* ------------------------------------------------------------------------ */
fix_datacksum(sp,n)568 void fix_datacksum(sp, n)
569 u_short *sp;
570 u_32_t n;
571 {
572 	u_short sumshort;
573 	u_32_t sum1;
574 
575 	if (n == 0)
576 		return;
577 
578 	sum1 = (~ntohs(*sp)) & 0xffff;
579 	sum1 += (n);
580 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
581 	/* Again */
582 	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
583 	sumshort = ~(u_short)sum1;
584 	*(sp) = htons(sumshort);
585 }
586 
587 
588 /* ------------------------------------------------------------------------ */
589 /* Function:    fr_nat_ioctl                                                */
590 /* Returns:     int - 0 == success, != 0 == failure                         */
591 /* Parameters:  data(I) - pointer to ioctl data                             */
592 /*              cmd(I)  - ioctl command integer                             */
593 /*              mode(I) - file mode bits used with open                     */
594 /*              uid(I)  - uid of caller                                     */
595 /*              ctx(I)  - pointer to give the uid context                   */
596 /*              ifs     - ipf stack instance                                */
597 /*                                                                          */
598 /* Processes an ioctl call made to operate on the IP Filter NAT device.     */
599 /* ------------------------------------------------------------------------ */
fr_nat_ioctl(data,cmd,mode,uid,ctx,ifs)600 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
601 ioctlcmd_t cmd;
602 caddr_t data;
603 int mode, uid;
604 void *ctx;
605 ipf_stack_t *ifs;
606 {
607 	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
608 	int error = 0, ret, arg, getlock;
609 	ipnat_t natd;
610 
611 #if (BSD >= 199306) && defined(_KERNEL)
612 	if ((securelevel >= 2) && (mode & FWRITE))
613 		return EPERM;
614 #endif
615 
616 #if defined(__osf__) && defined(_KERNEL)
617 	getlock = 0;
618 #else
619 	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
620 #endif
621 
622 	nat = NULL;     /* XXX gcc -Wuninitialized */
623 	if (cmd == (ioctlcmd_t)SIOCADNAT) {
624 		KMALLOC(nt, ipnat_t *);
625 	} else {
626 		nt = NULL;
627 	}
628 
629 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
630 		if (mode & NAT_SYSSPACE) {
631 			bcopy(data, (char *)&natd, sizeof(natd));
632 			error = 0;
633 		} else {
634 			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
635 		}
636 
637 	}
638 
639 	if (error != 0)
640 		goto done;
641 
642 	/*
643 	 * For add/delete, look to see if the NAT entry is already present
644 	 */
645 	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646 		nat = &natd;
647 		if (nat->in_v == 0)	/* For backward compat. */
648 			nat->in_v = 4;
649 		nat->in_flags &= IPN_USERFLAGS;
650 		if ((nat->in_redir & NAT_MAPBLK) == 0) {
651 			if ((nat->in_flags & IPN_SPLIT) == 0)
652 				nat->in_inip &= nat->in_inmsk;
653 			if ((nat->in_flags & IPN_IPRANGE) == 0)
654 				nat->in_outip &= nat->in_outmsk;
655 		}
656 		MUTEX_ENTER(&ifs->ifs_ipf_natio);
657 		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
658 		     np = &n->in_next)
659 			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
660 			    IPN_CMPSIZ) == 0) {
661 				if (nat->in_redir == NAT_REDIRECT &&
662 				    nat->in_pnext != n->in_pnext)
663 					continue;
664 				break;
665 			}
666 	}
667 
668 	switch (cmd)
669 	{
670 	case SIOCGENITER :
671 	    {
672 		ipfgeniter_t iter;
673 		ipftoken_t *token;
674 
675 		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
676 		if (error != 0)
677 			break;
678 
679 		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
680 		if (token != NULL)
681 			error  = nat_iterator(token, &iter, ifs);
682 		else
683 			error = ESRCH;
684 		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
685 		break;
686 	    }
687 #ifdef  IPFILTER_LOG
688 	case SIOCIPFFB :
689 	{
690 		int tmp;
691 
692 		if (!(mode & FWRITE))
693 			error = EPERM;
694 		else {
695 			tmp = ipflog_clear(IPL_LOGNAT, ifs);
696 			error = BCOPYOUT((char *)&tmp, (char *)data,
697 					sizeof(tmp));
698 			if (error != 0)
699 				error = EFAULT;
700 		}
701 		break;
702 	}
703 	case SIOCSETLG :
704 		if (!(mode & FWRITE)) {
705 			error = EPERM;
706 		} else {
707 			error = BCOPYIN((char *)data,
708 					(char *)&ifs->ifs_nat_logging,
709 					sizeof(ifs->ifs_nat_logging));
710 			if (error != 0)
711 				error = EFAULT;
712 		}
713 		break;
714 	case SIOCGETLG :
715 		error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
716 				sizeof(ifs->ifs_nat_logging));
717 		if (error != 0)
718 			error = EFAULT;
719 		break;
720 	case FIONREAD :
721 		arg = ifs->ifs_iplused[IPL_LOGNAT];
722 		error = BCOPYOUT(&arg, data, sizeof(arg));
723 		if (error != 0)
724 			error = EFAULT;
725 		break;
726 #endif
727 	case SIOCADNAT :
728 		if (!(mode & FWRITE)) {
729 			error = EPERM;
730 		} else if (n != NULL) {
731 			error = EEXIST;
732 		} else if (nt == NULL) {
733 			error = ENOMEM;
734 		}
735 		if (error != 0) {
736 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
737 			break;
738 		}
739 		bcopy((char *)nat, (char *)nt, sizeof(*n));
740 		error = nat_siocaddnat(nt, np, getlock, ifs);
741 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
742 		if (error == 0)
743 			nt = NULL;
744 		break;
745 	case SIOCRMNAT :
746 		if (!(mode & FWRITE)) {
747 			error = EPERM;
748 			n = NULL;
749 		} else if (n == NULL) {
750 			error = ESRCH;
751 		}
752 
753 		if (error != 0) {
754 			MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 			break;
756 		}
757 		nat_siocdelnat(n, np, getlock, ifs);
758 
759 		MUTEX_EXIT(&ifs->ifs_ipf_natio);
760 		n = NULL;
761 		break;
762 	case SIOCGNATS :
763 		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
764 		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
765 		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
766 		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
767 		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
768 		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
769 		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
770 		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
771 		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
772 		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
773 		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
774 		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
775 		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
776 		break;
777 	case SIOCGNATL :
778 	    {
779 		natlookup_t nl;
780 
781 		if (getlock) {
782 			READ_ENTER(&ifs->ifs_ipf_nat);
783 		}
784 		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
785 		if (nl.nl_v != 6)
786 			nl.nl_v = 4;
787 		if (error == 0) {
788 			void *ptr;
789 
790 			switch (nl.nl_v)
791 			{
792 			case 4:
793 				ptr = nat_lookupredir(&nl, ifs);
794 				break;
795 #ifdef	USE_INET6
796 			case 6:
797 				ptr = nat6_lookupredir(&nl, ifs);
798 				break;
799 #endif
800 			default:
801 				ptr = NULL;
802 				break;
803 			}
804 
805 			if (ptr != NULL) {
806 				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
807 			} else {
808 				error = ESRCH;
809 			}
810 		}
811 		if (getlock) {
812 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
813 		}
814 		break;
815 	    }
816 	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
817 		if (!(mode & FWRITE)) {
818 			error = EPERM;
819 			break;
820 		}
821 		if (getlock) {
822 			WRITE_ENTER(&ifs->ifs_ipf_nat);
823 		}
824 		error = BCOPYIN(data, &arg, sizeof(arg));
825 		if (error != 0) {
826 			error = EFAULT;
827 		} else {
828 			if (arg == FLUSH_LIST)
829 				ret = nat_clearlist(ifs);
830 			else if (VALID_TABLE_FLUSH_OPT(arg))
831 				ret = nat_flushtable(arg, ifs);
832 			else
833 				error = EINVAL;
834 		}
835 		if (getlock) {
836 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
837 		}
838 		if (error == 0) {
839 			error = BCOPYOUT(&ret, data, sizeof(ret));
840 			if (error != 0)
841 				error = EFAULT;
842 		}
843 		break;
844 	case SIOCPROXY :
845 		error = appr_ioctl(data, cmd, mode, ifs);
846 		break;
847 	case SIOCSTLCK :
848 		if (!(mode & FWRITE)) {
849 			error = EPERM;
850 		} else {
851 			error = fr_lock(data, &ifs->ifs_fr_nat_lock);
852 		}
853 		break;
854 	case SIOCSTPUT :
855 		if ((mode & FWRITE) != 0) {
856 			error = fr_natputent(data, getlock, ifs);
857 		} else {
858 			error = EACCES;
859 		}
860 		break;
861 	case SIOCSTGSZ :
862 		if (ifs->ifs_fr_nat_lock) {
863 			if (getlock) {
864 				READ_ENTER(&ifs->ifs_ipf_nat);
865 			}
866 			error = fr_natgetsz(data, ifs);
867 			if (getlock) {
868 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
869 			}
870 		} else
871 			error = EACCES;
872 		break;
873 	case SIOCSTGET :
874 		if (ifs->ifs_fr_nat_lock) {
875 			if (getlock) {
876 				READ_ENTER(&ifs->ifs_ipf_nat);
877 			}
878 			error = fr_natgetent(data, ifs);
879 			if (getlock) {
880 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
881 			}
882 		} else
883 			error = EACCES;
884 		break;
885 	case SIOCIPFDELTOK :
886 		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
887 		if (error != 0) {
888 			error = EFAULT;
889 		} else {
890 			error = ipf_deltoken(arg, uid, ctx, ifs);
891 		}
892 		break;
893 	default :
894 		error = EINVAL;
895 		break;
896 	}
897 done:
898 	if (nt)
899 		KFREE(nt);
900 	return error;
901 }
902 
903 
904 /* ------------------------------------------------------------------------ */
905 /* Function:    nat_siocaddnat                                              */
906 /* Returns:     int - 0 == success, != 0 == failure                         */
907 /* Parameters:  n(I)       - pointer to new NAT rule                        */
908 /*              np(I)      - pointer to where to insert new NAT rule        */
909 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
910 /* Mutex Locks: ipf_natio                                                   */
911 /*                                                                          */
912 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
913 /* from information passed to the kernel, then add it  to the appropriate   */
914 /* NAT rule table(s).                                                       */
915 /* ------------------------------------------------------------------------ */
nat_siocaddnat(n,np,getlock,ifs)916 static int nat_siocaddnat(n, np, getlock, ifs)
917 ipnat_t *n, **np;
918 int getlock;
919 ipf_stack_t *ifs;
920 {
921 	int error = 0, i, j;
922 
923 	if (nat_resolverule(n, ifs) != 0)
924 		return ENOENT;
925 
926 	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
927 		return EINVAL;
928 
929 	n->in_use = 0;
930 	if (n->in_redir & NAT_MAPBLK)
931 		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
932 	else if (n->in_flags & IPN_AUTOPORTMAP)
933 		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
934 	else if (n->in_flags & IPN_IPRANGE)
935 		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
936 	else if (n->in_flags & IPN_SPLIT)
937 		n->in_space = 2;
938 	else if (n->in_outmsk != 0)
939 		n->in_space = ~ntohl(n->in_outmsk);
940 	else
941 		n->in_space = 1;
942 	if ((n->in_flags & NAT_TCPUDPICMPQ) && (n->in_redir != NAT_REDIRECT)) {
943 		if (ntohs(n->in_pmax) < ntohs(n->in_pmin))
944 			return EINVAL;
945 	}
946 
947 	/*
948 	 * Calculate the number of valid IP addresses in the output
949 	 * mapping range.  In all cases, the range is inclusive of
950 	 * the start and ending IP addresses.
951 	 * If to a CIDR address, lose 2: broadcast + network address
952 	 *                               (so subtract 1)
953 	 * If to a range, add one.
954 	 * If to a single IP address, set to 1.
955 	 */
956 	if (n->in_space) {
957 		if ((n->in_flags & IPN_IPRANGE) != 0)
958 			n->in_space += 1;
959 		else
960 			n->in_space -= 1;
961 	} else
962 		n->in_space = 1;
963 
964 #ifdef	USE_INET6
965 	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
966 	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
967 		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
968 	else if (n->in_v == 6 &&
969 	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
970 		n->in_next6 = n->in_in[0];
971 	else if (n->in_v == 6)
972 		n->in_next6 = n->in_out[0];
973 	else
974 #endif
975 	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
976 	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
977 		n->in_nip = ntohl(n->in_outip) + 1;
978 	else if ((n->in_flags & IPN_SPLIT) &&
979 		 (n->in_redir & NAT_REDIRECT))
980 		n->in_nip = ntohl(n->in_inip);
981 	else
982 		n->in_nip = ntohl(n->in_outip);
983 
984 	if (n->in_redir & NAT_MAP) {
985 		n->in_pnext = ntohs(n->in_pmin);
986 		/*
987 		 * Multiply by the number of ports made available.
988 		 */
989 		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
990 			n->in_space *= (ntohs(n->in_pmax) -
991 					ntohs(n->in_pmin) + 1);
992 			/*
993 			 * Because two different sources can map to
994 			 * different destinations but use the same
995 			 * local IP#/port #.
996 			 * If the result is smaller than in_space, then
997 			 * we may have wrapped around 32bits.
998 			 */
999 			i = n->in_inmsk;
1000 			if ((i != 0) && (i != 0xffffffff)) {
1001 				j = n->in_space * (~ntohl(i) + 1);
1002 				if (j >= n->in_space)
1003 					n->in_space = j;
1004 				else
1005 					n->in_space = 0xffffffff;
1006 			}
1007 		}
1008 		/*
1009 		 * If no protocol is specified, multiple by 256 to allow for
1010 		 * at least one IP:IP mapping per protocol.
1011 		 */
1012 		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1013 				j = n->in_space * 256;
1014 				if (j >= n->in_space)
1015 					n->in_space = j;
1016 				else
1017 					n->in_space = 0xffffffff;
1018 		}
1019 	}
1020 
1021 	/* Otherwise, these fields are preset */
1022 
1023 	if (getlock) {
1024 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1025 	}
1026 	n->in_next = NULL;
1027 	*np = n;
1028 
1029 	if (n->in_age[0] != 0)
1030 	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1031 						  n->in_age[0], ifs);
1032 
1033 	if (n->in_age[1] != 0)
1034 	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1035 						  n->in_age[1], ifs);
1036 
1037 	if (n->in_redir & NAT_REDIRECT) {
1038 		n->in_flags &= ~IPN_NOTDST;
1039 		switch (n->in_v)
1040 		{
1041 		case 4 :
1042 			nat_addrdr(n, ifs);
1043 			break;
1044 #ifdef	USE_INET6
1045 		case 6 :
1046 			nat6_addrdr(n, ifs);
1047 			break;
1048 #endif
1049 		default :
1050 			break;
1051 		}
1052 	}
1053 	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1054 		n->in_flags &= ~IPN_NOTSRC;
1055 		switch (n->in_v)
1056 		{
1057 		case 4 :
1058 			nat_addnat(n, ifs);
1059 			break;
1060 #ifdef	USE_INET6
1061 		case 6 :
1062 			nat6_addnat(n, ifs);
1063 			break;
1064 #endif
1065 		default :
1066 			break;
1067 		}
1068 	}
1069 	n = NULL;
1070 	ifs->ifs_nat_stats.ns_rules++;
1071 	if (getlock) {
1072 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1073 	}
1074 
1075 	return error;
1076 }
1077 
1078 
1079 /* ------------------------------------------------------------------------ */
1080 /* Function:    nat_resolvrule                                              */
1081 /* Returns:     int - 0 == success, -1 == failure                           */
1082 /* Parameters:  n(I)  - pointer to NAT rule                                 */
1083 /*                                                                          */
1084 /* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1085 /* any specified interfaces and proxy labels, and determines whether or not */
1086 /* all proxy labels are correctly specified.				    */
1087 /*									    */
1088 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1089 /* ------------------------------------------------------------------------ */
nat_resolverule(n,ifs)1090 static int nat_resolverule(n, ifs)
1091 ipnat_t *n;
1092 ipf_stack_t *ifs;
1093 {
1094 	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1095 	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1096 
1097 	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1098 	if (n->in_ifnames[1][0] == '\0') {
1099 		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1100 		n->in_ifps[1] = n->in_ifps[0];
1101 	} else {
1102 		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1103 	}
1104 
1105 	if (n->in_plabel[0] != '\0') {
1106 		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1107 		if (n->in_apr == NULL)
1108 			return -1;
1109 	}
1110 	return 0;
1111 }
1112 
1113 
1114 /* ------------------------------------------------------------------------ */
1115 /* Function:    nat_siocdelnat                                              */
1116 /* Returns:     int - 0 == success, != 0 == failure                         */
1117 /* Parameters:  n(I)       - pointer to new NAT rule                        */
1118 /*              np(I)      - pointer to where to insert new NAT rule        */
1119 /*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1120 /* Mutex Locks: ipf_natio                                                   */
1121 /*                                                                          */
1122 /* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1123 /* from information passed to the kernel, then add it  to the appropriate   */
1124 /* NAT rule table(s).                                                       */
1125 /* ------------------------------------------------------------------------ */
nat_siocdelnat(n,np,getlock,ifs)1126 static void nat_siocdelnat(n, np, getlock, ifs)
1127 ipnat_t *n, **np;
1128 int getlock;
1129 ipf_stack_t *ifs;
1130 {
1131 	int i;
1132 
1133 	if (getlock) {
1134 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1135 	}
1136 	if (n->in_redir & NAT_REDIRECT)
1137 		nat_delrdr(n);
1138 	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1139 		nat_delnat(n);
1140 	if (ifs->ifs_nat_list == NULL) {
1141 		ifs->ifs_nat_masks = 0;
1142 		ifs->ifs_rdr_masks = 0;
1143 		for (i = 0; i < 4; i++) {
1144 			ifs->ifs_nat6_masks[i] = 0;
1145 			ifs->ifs_rdr6_masks[i] = 0;
1146 		}
1147 	}
1148 
1149 	if (n->in_tqehead[0] != NULL) {
1150 		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1151 			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1152 		}
1153 	}
1154 
1155 	if (n->in_tqehead[1] != NULL) {
1156 		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1157 			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1158 		}
1159 	}
1160 
1161 	*np = n->in_next;
1162 
1163 	if (n->in_use == 0) {
1164 		if (n->in_apr)
1165 			appr_free(n->in_apr);
1166 		KFREE(n);
1167 		ifs->ifs_nat_stats.ns_rules--;
1168 	} else {
1169 		n->in_flags |= IPN_DELETE;
1170 		n->in_next = NULL;
1171 	}
1172 	if (getlock) {
1173 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1174 	}
1175 }
1176 
1177 
1178 /* ------------------------------------------------------------------------ */
1179 /* Function:    fr_natgetsz                                                 */
1180 /* Returns:     int - 0 == success, != 0 is the error value.                */
1181 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1182 /*                        get the size of.                                  */
1183 /*                                                                          */
1184 /* Handle SIOCSTGSZ.                                                        */
1185 /* Return the size of the nat list entry to be copied back to user space.   */
1186 /* The size of the entry is stored in the ng_sz field and the enture natget */
1187 /* structure is copied back to the user.                                    */
1188 /* ------------------------------------------------------------------------ */
fr_natgetsz(data,ifs)1189 static int fr_natgetsz(data, ifs)
1190 caddr_t data;
1191 ipf_stack_t *ifs;
1192 {
1193 	ap_session_t *aps;
1194 	nat_t *nat, *n;
1195 	natget_t ng;
1196 	int err;
1197 
1198 	err = BCOPYIN(data, &ng, sizeof(ng));
1199 	if (err != 0)
1200 		return EFAULT;
1201 
1202 	nat = ng.ng_ptr;
1203 	if (!nat) {
1204 		nat = ifs->ifs_nat_instances;
1205 		ng.ng_sz = 0;
1206 		/*
1207 		 * Empty list so the size returned is 0.  Simple.
1208 		 */
1209 		if (nat == NULL) {
1210 			err = BCOPYOUT(&ng, data, sizeof(ng));
1211 			if (err != 0) {
1212 				return EFAULT;
1213 			} else {
1214 				return 0;
1215 			}
1216 		}
1217 	} else {
1218 		/*
1219 		 * Make sure the pointer we're copying from exists in the
1220 		 * current list of entries.  Security precaution to prevent
1221 		 * copying of random kernel data.
1222 		 */
1223 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1224 			if (n == nat)
1225 				break;
1226 		if (!n)
1227 			return ESRCH;
1228 	}
1229 
1230 	/*
1231 	 * Incluse any space required for proxy data structures.
1232 	 */
1233 	ng.ng_sz = sizeof(nat_save_t);
1234 	aps = nat->nat_aps;
1235 	if (aps != NULL) {
1236 		ng.ng_sz += sizeof(ap_session_t) - 4;
1237 		if (aps->aps_data != 0)
1238 			ng.ng_sz += aps->aps_psiz;
1239 	}
1240 
1241 	err = BCOPYOUT(&ng, data, sizeof(ng));
1242 	if (err != 0)
1243 		return EFAULT;
1244 	return 0;
1245 }
1246 
1247 
1248 /* ------------------------------------------------------------------------ */
1249 /* Function:    fr_natgetent                                                */
1250 /* Returns:     int - 0 == success, != 0 is the error value.                */
1251 /* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1252 /*                        to NAT structure to copy out.                     */
1253 /*                                                                          */
1254 /* Handle SIOCSTGET.                                                        */
1255 /* Copies out NAT entry to user space.  Any additional data held for a      */
1256 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1257 /* ------------------------------------------------------------------------ */
fr_natgetent(data,ifs)1258 static int fr_natgetent(data, ifs)
1259 caddr_t data;
1260 ipf_stack_t *ifs;
1261 {
1262 	int error, outsize;
1263 	ap_session_t *aps;
1264 	nat_save_t *ipn, ipns;
1265 	nat_t *n, *nat;
1266 
1267 	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1268 	if (error != 0)
1269 		return error;
1270 
1271 	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1272 		return EINVAL;
1273 
1274 	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1275 	if (ipn == NULL)
1276 		return ENOMEM;
1277 
1278 	ipn->ipn_dsize = ipns.ipn_dsize;
1279 	nat = ipns.ipn_next;
1280 	if (nat == NULL) {
1281 		nat = ifs->ifs_nat_instances;
1282 		if (nat == NULL) {
1283 			if (ifs->ifs_nat_instances == NULL)
1284 				error = ENOENT;
1285 			goto finished;
1286 		}
1287 	} else {
1288 		/*
1289 		 * Make sure the pointer we're copying from exists in the
1290 		 * current list of entries.  Security precaution to prevent
1291 		 * copying of random kernel data.
1292 		 */
1293 		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1294 			if (n == nat)
1295 				break;
1296 		if (n == NULL) {
1297 			error = ESRCH;
1298 			goto finished;
1299 		}
1300 	}
1301 	ipn->ipn_next = nat->nat_next;
1302 
1303 	/*
1304 	 * Copy the NAT structure.
1305 	 */
1306 	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1307 
1308 	/*
1309 	 * If we have a pointer to the NAT rule it belongs to, save that too.
1310 	 */
1311 	if (nat->nat_ptr != NULL)
1312 		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1313 		      sizeof(ipn->ipn_ipnat));
1314 
1315 	/*
1316 	 * If we also know the NAT entry has an associated filter rule,
1317 	 * save that too.
1318 	 */
1319 	if (nat->nat_fr != NULL)
1320 		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1321 		      sizeof(ipn->ipn_fr));
1322 
1323 	/*
1324 	 * Last but not least, if there is an application proxy session set
1325 	 * up for this NAT entry, then copy that out too, including any
1326 	 * private data saved along side it by the proxy.
1327 	 */
1328 	aps = nat->nat_aps;
1329 	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1330 	if (aps != NULL) {
1331 		char *s;
1332 
1333 		if (outsize < sizeof(*aps)) {
1334 			error = ENOBUFS;
1335 			goto finished;
1336 		}
1337 
1338 		s = ipn->ipn_data;
1339 		bcopy((char *)aps, s, sizeof(*aps));
1340 		s += sizeof(*aps);
1341 		outsize -= sizeof(*aps);
1342 		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1343 			bcopy(aps->aps_data, s, aps->aps_psiz);
1344 		else
1345 			error = ENOBUFS;
1346 	}
1347 	if (error == 0) {
1348 		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1349 	}
1350 
1351 finished:
1352 	if (ipn != NULL) {
1353 		KFREES(ipn, ipns.ipn_dsize);
1354 	}
1355 	return error;
1356 }
1357 
1358 /* ------------------------------------------------------------------------ */
1359 /* Function:    nat_calc_chksum_diffs					    */
1360 /* Returns:     void							    */
1361 /* Parameters:  nat	-	pointer to NAT table entry		    */
1362 /*                                                                          */
1363 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1364 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1365 /* we are dealing with partial chksum offload. For these cases we need to   */
1366 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1367 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1368 /* nat_sumd[0]. 							    */
1369 /*									    */
1370 /* The function accepts initialized NAT table entry and computes the deltas */
1371 /* from nat_inip/nat_outip members. The function is called right before	    */
1372 /* the new entry is inserted into the table.				    */
1373 /*									    */
1374 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1375 /* of delta between original and new IP addresses.			    */
1376 /*									    */
1377 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1378 /* a chkusm of delta between original an new IP addrress:port tupples.	    */
1379 /*									    */
1380 /* Some facts about chksum, we should remember:				    */
1381 /*	IP header chksum covers IP header only				    */
1382 /*									    */
1383 /*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1384 /*		SRC, DST IP address					    */
1385 /*		SRC, DST Port						    */
1386 /*		length of payload					    */
1387 /*									    */
1388 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1389 /* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1390 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1391 /* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1392 /* stored along with other IP packet data in dblk_t structure and used in   */
1393 /* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1394 /*									    */
1395 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1396 /* of delta between new and orig address. NOTE: the order of operands for   */
1397 /* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1398 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1399 /*									    */
1400 /* ------------------------------------------------------------------------ */
nat_calc_chksum_diffs(nat)1401 void nat_calc_chksum_diffs(nat)
1402 nat_t *nat;
1403 {
1404 	u_32_t	sum_orig = 0;
1405 	u_32_t	sum_changed = 0;
1406 	u_32_t	sumd;
1407 	u_32_t	ipsum_orig = 0;
1408 	u_32_t	ipsum_changed = 0;
1409 
1410 	if (nat->nat_v != 4 && nat->nat_v != 6)
1411 		return;
1412 
1413 	/*
1414 	 * the switch calculates operands for CALC_SUMD(),
1415 	 * which will compute the partial chksum delta.
1416 	 */
1417 	switch (nat->nat_dir)
1418 	{
1419 	case NAT_INBOUND:
1420 		/*
1421 		 * we are dealing with RDR rule (DST address gets
1422 		 * modified on packet from client)
1423 		 */
1424 		if (nat->nat_v == 4) {
1425 			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1426 			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1427 		} else {
1428 			sum_changed = LONG_SUM6(&nat->nat_inip6);
1429 			sum_orig = LONG_SUM6(&nat->nat_outip6);
1430 		}
1431 		break;
1432 	case NAT_OUTBOUND:
1433 		/*
1434 		 * we are dealing with MAP rule (SRC address gets
1435 		 * modified on packet from client)
1436 		 */
1437 		if (nat->nat_v == 4) {
1438 			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1439 			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1440 		} else {
1441 			sum_changed = LONG_SUM6(&nat->nat_outip6);
1442 			sum_orig = LONG_SUM6(&nat->nat_inip6);
1443 		}
1444 		break;
1445 	default: ;
1446 		break;
1447 	}
1448 
1449 	/*
1450 	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1451 	 * calculation, which happens at the end of function.
1452 	 */
1453 	ipsum_changed = sum_changed;
1454 	ipsum_orig = sum_orig;
1455 	/*
1456 	 * NOTE: the order of operands for partial chksum adjustment
1457 	 * computation has to be swapped!
1458 	 */
1459 	CALC_SUMD(sum_changed, sum_orig, sumd);
1460 	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1461 
1462 	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1463 
1464 		/*
1465 		 * switch calculates operands for CALC_SUMD(), which will
1466 		 * compute the full chksum delta.
1467 		 */
1468 		switch (nat->nat_dir)
1469 		{
1470 		case NAT_INBOUND:
1471 			if (nat->nat_v == 4) {
1472 				sum_changed = LONG_SUM(
1473 				    ntohl(nat->nat_inip.s_addr) +
1474 				    ntohs(nat->nat_inport));
1475 				sum_orig = LONG_SUM(
1476 				    ntohl(nat->nat_outip.s_addr) +
1477 				    ntohs(nat->nat_outport));
1478 			} else {
1479 				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1480 				    ntohs(nat->nat_inport);
1481 				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1482 				    ntohs(nat->nat_outport);
1483 			}
1484 			break;
1485 		case NAT_OUTBOUND:
1486 			if (nat->nat_v == 4) {
1487 				sum_changed = LONG_SUM(
1488 				    ntohl(nat->nat_outip.s_addr) +
1489 				    ntohs(nat->nat_outport));
1490 				sum_orig = LONG_SUM(
1491 				    ntohl(nat->nat_inip.s_addr) +
1492 				    ntohs(nat->nat_inport));
1493 			} else {
1494 				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1495 				    ntohs(nat->nat_outport);
1496 				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1497 				    ntohs(nat->nat_inport);
1498 			}
1499 			break;
1500 		default: ;
1501 			break;
1502 		}
1503 
1504 		CALC_SUMD(sum_orig, sum_changed, sumd);
1505 		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1506 
1507 		if (!(nat->nat_flags & IPN_TCPUDP)) {
1508 			/*
1509 			 * partial HW chksum offload works for TCP/UDP headers only,
1510 			 * so we need to enforce full chksum adjustment for ICMP
1511 			 */
1512 			nat->nat_sumd[1] = nat->nat_sumd[0];
1513 		}
1514 	}
1515 	else
1516 		nat->nat_sumd[0] = nat->nat_sumd[1];
1517 
1518 	/*
1519 	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1520 	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1521 	 */
1522 	if (nat->nat_v == 4) {
1523 		if (NAT_HAS_L4_CHANGED(nat)) {
1524 			/*
1525 			 * bad luck, NAT changes also the L4 header, use IP
1526 			 * addresses to compute chksum adjustment for IP header.
1527 			 */
1528 			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1529 			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1530 		} else {
1531 			/*
1532 			 * the NAT does not change L4 hdr -> reuse chksum
1533 			 * adjustment for IP hdr.
1534 			 */
1535 			nat->nat_ipsumd = nat->nat_sumd[0];
1536 
1537 			/*
1538 			 * if L4 header does not use chksum - zero out deltas
1539 			 */
1540 			if (!(nat->nat_flags & IPN_TCPUDP)) {
1541 				nat->nat_sumd[0] = 0;
1542 				nat->nat_sumd[1] = 0;
1543 			}
1544 		}
1545 	}
1546 
1547 	return;
1548 }
1549 
1550 /* ------------------------------------------------------------------------ */
1551 /* Function:    fr_natputent                                                */
1552 /* Returns:     int - 0 == success, != 0 is the error value.                */
1553 /* Parameters:  data(I)    - pointer to natget structure with NAT           */
1554 /*                           structure information to load into the kernel  */
1555 /*              getlock(I) - flag indicating whether or not a write lock    */
1556 /*                           on ipf_nat is already held.                    */
1557 /*              ifs        - ipf stack instance                             */
1558 /*                                                                          */
1559 /* Handle SIOCSTPUT.                                                        */
1560 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1561 /* firewall rule data structures, if pointers to them indicate so.          */
1562 /* ------------------------------------------------------------------------ */
fr_natputent(data,getlock,ifs)1563 static int fr_natputent(data, getlock, ifs)
1564 caddr_t data;
1565 int getlock;
1566 ipf_stack_t *ifs;
1567 {
1568 	nat_save_t ipn, *ipnn;
1569 	ap_session_t *aps;
1570 	nat_t *n, *nat;
1571 	frentry_t *fr;
1572 	fr_info_t fin;
1573 	ipnat_t *in;
1574 	int error;
1575 
1576 	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1577 	if (error != 0)
1578 		return error;
1579 
1580 	/*
1581 	 * Trigger automatic call to nat_flushtable() if the
1582 	 * table has reached capcity specified by hi watermark.
1583 	 */
1584 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
1585 		ifs->ifs_nat_doflush = 1;
1586 
1587 	/*
1588 	 * If automatic flushing did not do its job, and the table
1589 	 * has filled up, don't try to create a new entry.
1590 	 */
1591 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1592 		ifs->ifs_nat_stats.ns_memfail++;
1593 		return ENOMEM;
1594 	}
1595 
1596 	/*
1597 	 * Initialise early because of code at junkput label.
1598 	 */
1599 	in = NULL;
1600 	aps = NULL;
1601 	nat = NULL;
1602 	ipnn = NULL;
1603 
1604 	/*
1605 	 * New entry, copy in the rest of the NAT entry if it's size is more
1606 	 * than just the nat_t structure.
1607 	 */
1608 	fr = NULL;
1609 	if (ipn.ipn_dsize > sizeof(ipn)) {
1610 		if (ipn.ipn_dsize > 81920) {
1611 			error = ENOMEM;
1612 			goto junkput;
1613 		}
1614 
1615 		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1616 		if (ipnn == NULL)
1617 			return ENOMEM;
1618 
1619 		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1620 		if (error != 0) {
1621 			error = EFAULT;
1622 			goto junkput;
1623 		}
1624 	} else
1625 		ipnn = &ipn;
1626 
1627 	KMALLOC(nat, nat_t *);
1628 	if (nat == NULL) {
1629 		error = ENOMEM;
1630 		goto junkput;
1631 	}
1632 
1633 	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1634 	/*
1635 	 * Initialize all these so that nat_delete() doesn't cause a crash.
1636 	 */
1637 	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1638 	nat->nat_tqe.tqe_pnext = NULL;
1639 	nat->nat_tqe.tqe_next = NULL;
1640 	nat->nat_tqe.tqe_ifq = NULL;
1641 	nat->nat_tqe.tqe_parent = nat;
1642 
1643 	/*
1644 	 * Restore the rule associated with this nat session
1645 	 */
1646 	in = ipnn->ipn_nat.nat_ptr;
1647 	if (in != NULL) {
1648 		KMALLOC(in, ipnat_t *);
1649 		nat->nat_ptr = in;
1650 		if (in == NULL) {
1651 			error = ENOMEM;
1652 			goto junkput;
1653 		}
1654 		bzero((char *)in, offsetof(struct ipnat, in_next6));
1655 		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1656 		in->in_use = 1;
1657 		in->in_flags |= IPN_DELETE;
1658 
1659 		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1660 
1661 		if (nat_resolverule(in, ifs) != 0) {
1662 			error = ESRCH;
1663 			goto junkput;
1664 		}
1665 	}
1666 
1667 	/*
1668 	 * Check that the NAT entry doesn't already exist in the kernel.
1669 	 */
1670 	if (nat->nat_v != 6)
1671 		nat->nat_v = 4;
1672 	bzero((char *)&fin, sizeof(fin));
1673 	fin.fin_p = nat->nat_p;
1674 	fin.fin_ifs = ifs;
1675 	if (nat->nat_dir == NAT_OUTBOUND) {
1676 		fin.fin_data[0] = ntohs(nat->nat_oport);
1677 		fin.fin_data[1] = ntohs(nat->nat_outport);
1678 		fin.fin_ifp = nat->nat_ifps[0];
1679 		if (getlock) {
1680 			READ_ENTER(&ifs->ifs_ipf_nat);
1681 		}
1682 
1683 		switch (nat->nat_v)
1684 		{
1685 		case 4:
1686 			fin.fin_v = nat->nat_v;
1687 			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1688 			    nat->nat_oip, nat->nat_outip);
1689 			break;
1690 #ifdef USE_INET6
1691 		case 6:
1692 			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1693 			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1694 			break;
1695 #endif
1696 		default:
1697 			n = NULL;
1698 			break;
1699 		}
1700 
1701 		if (getlock) {
1702 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1703 		}
1704 		if (n != NULL) {
1705 			error = EEXIST;
1706 			goto junkput;
1707 		}
1708 	} else if (nat->nat_dir == NAT_INBOUND) {
1709 		fin.fin_data[0] = ntohs(nat->nat_inport);
1710 		fin.fin_data[1] = ntohs(nat->nat_oport);
1711 		fin.fin_ifp = nat->nat_ifps[1];
1712 		if (getlock) {
1713 			READ_ENTER(&ifs->ifs_ipf_nat);
1714 		}
1715 
1716 		switch (nat->nat_v)
1717 		{
1718 		case 4:
1719 			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1720 			    nat->nat_inip, nat->nat_oip);
1721 			break;
1722 #ifdef USE_INET6
1723 		case 6:
1724 			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1725 			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1726 			break;
1727 #endif
1728 		default:
1729 			n = NULL;
1730 			break;
1731 		}
1732 
1733 		if (getlock) {
1734 			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1735 		}
1736 		if (n != NULL) {
1737 			error = EEXIST;
1738 			goto junkput;
1739 		}
1740 	} else {
1741 		error = EINVAL;
1742 		goto junkput;
1743 	}
1744 
1745 	/*
1746 	 * Restore ap_session_t structure.  Include the private data allocated
1747 	 * if it was there.
1748 	 */
1749 	aps = nat->nat_aps;
1750 	if (aps != NULL) {
1751 		KMALLOC(aps, ap_session_t *);
1752 		nat->nat_aps = aps;
1753 		if (aps == NULL) {
1754 			error = ENOMEM;
1755 			goto junkput;
1756 		}
1757 		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1758 		if (in != NULL)
1759 			aps->aps_apr = in->in_apr;
1760 		else
1761 			aps->aps_apr = NULL;
1762 		if (aps->aps_psiz != 0) {
1763 			if (aps->aps_psiz > 81920) {
1764 				error = ENOMEM;
1765 				goto junkput;
1766 			}
1767 			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1768 			if (aps->aps_data == NULL) {
1769 				error = ENOMEM;
1770 				goto junkput;
1771 			}
1772 			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1773 			      aps->aps_psiz);
1774 		} else {
1775 			aps->aps_psiz = 0;
1776 			aps->aps_data = NULL;
1777 		}
1778 	}
1779 
1780 	/*
1781 	 * If there was a filtering rule associated with this entry then
1782 	 * build up a new one.
1783 	 */
1784 	fr = nat->nat_fr;
1785 	if (fr != NULL) {
1786 		if ((nat->nat_flags & SI_NEWFR) != 0) {
1787 			KMALLOC(fr, frentry_t *);
1788 			nat->nat_fr = fr;
1789 			if (fr == NULL) {
1790 				error = ENOMEM;
1791 				goto junkput;
1792 			}
1793 			ipnn->ipn_nat.nat_fr = fr;
1794 			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1795 			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1796 
1797 			fr->fr_ref = 1;
1798 			fr->fr_dsize = 0;
1799 			fr->fr_data = NULL;
1800 			fr->fr_type = FR_T_NONE;
1801 
1802 			MUTEX_NUKE(&fr->fr_lock);
1803 			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1804 		} else {
1805 			if (getlock) {
1806 				READ_ENTER(&ifs->ifs_ipf_nat);
1807 			}
1808 			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1809 				if (n->nat_fr == fr)
1810 					break;
1811 
1812 			if (n != NULL) {
1813 				MUTEX_ENTER(&fr->fr_lock);
1814 				fr->fr_ref++;
1815 				MUTEX_EXIT(&fr->fr_lock);
1816 			}
1817 			if (getlock) {
1818 				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1819 			}
1820 			if (!n) {
1821 				error = ESRCH;
1822 				goto junkput;
1823 			}
1824 		}
1825 	}
1826 
1827 	if (ipnn != &ipn) {
1828 		KFREES(ipnn, ipn.ipn_dsize);
1829 		ipnn = NULL;
1830 	}
1831 
1832 	nat_calc_chksum_diffs(nat);
1833 
1834 	if (getlock) {
1835 		WRITE_ENTER(&ifs->ifs_ipf_nat);
1836 	}
1837 
1838 	nat_calc_chksum_diffs(nat);
1839 
1840 	switch (nat->nat_v)
1841 	{
1842 	case 4 :
1843 		error = nat_insert(nat, nat->nat_rev, ifs);
1844 		break;
1845 #ifdef USE_INET6
1846 	case 6 :
1847 		error = nat6_insert(nat, nat->nat_rev, ifs);
1848 		break;
1849 #endif
1850 	default :
1851 		break;
1852 	}
1853 
1854 	if ((error == 0) && (aps != NULL)) {
1855 		aps->aps_next = ifs->ifs_ap_sess_list;
1856 		ifs->ifs_ap_sess_list = aps;
1857 	}
1858 	if (getlock) {
1859 		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1860 	}
1861 
1862 	if (error == 0)
1863 		return 0;
1864 
1865 	error = ENOMEM;
1866 
1867 junkput:
1868 	if (fr != NULL)
1869 		(void) fr_derefrule(&fr, ifs);
1870 
1871 	if ((ipnn != NULL) && (ipnn != &ipn)) {
1872 		KFREES(ipnn, ipn.ipn_dsize);
1873 	}
1874 	if (nat != NULL) {
1875 		if (aps != NULL) {
1876 			if (aps->aps_data != NULL) {
1877 				KFREES(aps->aps_data, aps->aps_psiz);
1878 			}
1879 			KFREE(aps);
1880 		}
1881 		if (in != NULL) {
1882 			if (in->in_apr)
1883 				appr_free(in->in_apr);
1884 			KFREE(in);
1885 		}
1886 		KFREE(nat);
1887 	}
1888 	return error;
1889 }
1890 
1891 
1892 /* ------------------------------------------------------------------------ */
1893 /* Function:    nat_delete                                                  */
1894 /* Returns:     int	- 0 if entry deleted. Otherwise, ref count on entry */
1895 /* Parameters:  nat	- pointer to the NAT entry to delete		    */
1896 /*		logtype	- type of LOG record to create before deleting	    */
1897 /*		ifs	- ipf stack instance				    */
1898 /* Write Lock:  ipf_nat                                                     */
1899 /*                                                                          */
1900 /* Delete a nat entry from the various lists and table.  If NAT logging is  */
1901 /* enabled then generate a NAT log record for this event.                   */
1902 /* ------------------------------------------------------------------------ */
nat_delete(nat,logtype,ifs)1903 int nat_delete(nat, logtype, ifs)
1904 struct nat *nat;
1905 int logtype;
1906 ipf_stack_t *ifs;
1907 {
1908 	struct ipnat *ipn;
1909 	int removed = 0;
1910 
1911 	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1912 		nat_log(nat, logtype, ifs);
1913 
1914 	/*
1915 	 * Start by removing the entry from the hash table of nat entries
1916 	 * so it will not be "used" again.
1917 	 *
1918 	 * It will remain in the "list" of nat entries until all references
1919 	 * have been accounted for.
1920 	 */
1921 	if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1922 		removed = 1;
1923 
1924 		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1925 		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1926 
1927 		*nat->nat_phnext[0] = nat->nat_hnext[0];
1928 		if (nat->nat_hnext[0] != NULL) {
1929 			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1930 			nat->nat_hnext[0] = NULL;
1931 		}
1932 		nat->nat_phnext[0] = NULL;
1933 
1934 		*nat->nat_phnext[1] = nat->nat_hnext[1];
1935 		if (nat->nat_hnext[1] != NULL) {
1936 			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1937 			nat->nat_hnext[1] = NULL;
1938 		}
1939 		nat->nat_phnext[1] = NULL;
1940 
1941 		if ((nat->nat_flags & SI_WILDP) != 0)
1942 			ifs->ifs_nat_stats.ns_wilds--;
1943 	}
1944 
1945 	/*
1946 	 * Next, remove it from the timeout queue it is in.
1947 	 */
1948 	fr_deletequeueentry(&nat->nat_tqe);
1949 
1950 	if (nat->nat_me != NULL) {
1951 		*nat->nat_me = NULL;
1952 		nat->nat_me = NULL;
1953 	}
1954 
1955 	MUTEX_ENTER(&nat->nat_lock);
1956  	if (nat->nat_ref > 1) {
1957 		nat->nat_ref--;
1958 		MUTEX_EXIT(&nat->nat_lock);
1959  		if (removed)
1960  			ifs->ifs_nat_stats.ns_orphans++;
1961 		return (nat->nat_ref);
1962 	}
1963 	MUTEX_EXIT(&nat->nat_lock);
1964 
1965 	nat->nat_ref = 0;
1966 
1967 	/*
1968 	 * If entry had already been removed,
1969 	 * it means we're cleaning up an orphan.
1970 	 */
1971  	if (!removed)
1972  		ifs->ifs_nat_stats.ns_orphans--;
1973 
1974 #ifdef	IPFILTER_SYNC
1975 	if (nat->nat_sync)
1976 		ipfsync_del(nat->nat_sync);
1977 #endif
1978 
1979 	/*
1980 	 * Now remove it from master list of nat table entries
1981 	 */
1982 	if (nat->nat_pnext != NULL) {
1983 		*nat->nat_pnext = nat->nat_next;
1984 		if (nat->nat_next != NULL) {
1985 			nat->nat_next->nat_pnext = nat->nat_pnext;
1986 			nat->nat_next = NULL;
1987 		}
1988 		nat->nat_pnext = NULL;
1989 	}
1990 
1991 	if (nat->nat_fr != NULL)
1992 		(void)fr_derefrule(&nat->nat_fr, ifs);
1993 
1994 	if (nat->nat_hm != NULL)
1995 		fr_hostmapdel(&nat->nat_hm);
1996 
1997 	/*
1998 	 * If there is an active reference from the nat entry to its parent
1999 	 * rule, decrement the rule's reference count and free it too if no
2000 	 * longer being used.
2001 	 */
2002 	ipn = nat->nat_ptr;
2003 	if (ipn != NULL) {
2004 		ipn->in_space++;
2005 		ipn->in_use--;
2006 		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2007 			if (ipn->in_apr)
2008 				appr_free(ipn->in_apr);
2009 			KFREE(ipn);
2010 			ifs->ifs_nat_stats.ns_rules--;
2011 		}
2012 	}
2013 
2014 	MUTEX_DESTROY(&nat->nat_lock);
2015 
2016 	aps_free(nat->nat_aps, ifs);
2017 	ifs->ifs_nat_stats.ns_inuse--;
2018 
2019 	/*
2020 	 * If there's a fragment table entry too for this nat entry, then
2021 	 * dereference that as well.  This is after nat_lock is released
2022 	 * because of Tru64.
2023 	 */
2024 	fr_forgetnat((void *)nat, ifs);
2025 
2026 	KFREE(nat);
2027 
2028 	return (0);
2029 }
2030 
2031 
2032 /* ------------------------------------------------------------------------ */
2033 /* Function:    nat_clearlist                                               */
2034 /* Returns:     int - number of NAT/RDR rules deleted                       */
2035 /* Parameters:  Nil                                                         */
2036 /*                                                                          */
2037 /* Delete all rules in the current list of rules.  There is nothing elegant */
2038 /* about this cleanup: simply free all entries on the list of rules and     */
2039 /* clear out the tables used for hashed NAT rule lookups.                   */
2040 /* ------------------------------------------------------------------------ */
nat_clearlist(ifs)2041 static int nat_clearlist(ifs)
2042 ipf_stack_t *ifs;
2043 {
2044 	ipnat_t *n, **np = &ifs->ifs_nat_list;
2045 	int i = 0;
2046 
2047 	if (ifs->ifs_nat_rules != NULL)
2048 		bzero((char *)ifs->ifs_nat_rules,
2049 		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2050 	if (ifs->ifs_rdr_rules != NULL)
2051 		bzero((char *)ifs->ifs_rdr_rules,
2052 		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2053 
2054 	while ((n = *np) != NULL) {
2055 		*np = n->in_next;
2056 		if (n->in_use == 0) {
2057 			if (n->in_apr != NULL)
2058 				appr_free(n->in_apr);
2059 			KFREE(n);
2060 			ifs->ifs_nat_stats.ns_rules--;
2061 		} else {
2062 			n->in_flags |= IPN_DELETE;
2063 			n->in_next = NULL;
2064 		}
2065 		i++;
2066 	}
2067 	ifs->ifs_nat_masks = 0;
2068 	ifs->ifs_rdr_masks = 0;
2069 	for (i = 0; i < 4; i++) {
2070 		ifs->ifs_nat6_masks[i] = 0;
2071 		ifs->ifs_rdr6_masks[i] = 0;
2072 	}
2073 	return i;
2074 }
2075 
2076 
2077 /* ------------------------------------------------------------------------ */
2078 /* Function:    nat_newmap                                                  */
2079 /* Returns:     int - -1 == error, 0 == success                             */
2080 /* Parameters:  fin(I) - pointer to packet information                      */
2081 /*              nat(I) - pointer to NAT entry                               */
2082 /*              ni(I)  - pointer to structure with misc. information needed */
2083 /*                       to create new NAT entry.                           */
2084 /*                                                                          */
2085 /* Given an empty NAT structure, populate it with new information about a   */
2086 /* new NAT session, as defined by the matching NAT rule.                    */
2087 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2088 /* to the new IP address for the translation.                               */
2089 /* ------------------------------------------------------------------------ */
nat_newmap(fin,nat,ni)2090 static INLINE int nat_newmap(fin, nat, ni)
2091 fr_info_t *fin;
2092 nat_t *nat;
2093 natinfo_t *ni;
2094 {
2095 	u_short st_port, dport, sport, port, sp, dp;
2096 	struct in_addr in, inb;
2097 	hostmap_t *hm;
2098 	u_32_t flags;
2099 	u_32_t st_ip;
2100 	ipnat_t *np;
2101 	nat_t *natl;
2102 	int l;
2103 	ipf_stack_t *ifs = fin->fin_ifs;
2104 
2105 	/*
2106 	 * If it's an outbound packet which doesn't match any existing
2107 	 * record, then create a new port
2108 	 */
2109 	l = 0;
2110 	hm = NULL;
2111 	np = ni->nai_np;
2112 	st_ip = np->in_nip;
2113 	st_port = np->in_pnext;
2114 	flags = ni->nai_flags;
2115 	sport = ni->nai_sport;
2116 	dport = ni->nai_dport;
2117 
2118 	/*
2119 	 * Do a loop until we either run out of entries to try or we find
2120 	 * a NAT mapping that isn't currently being used.  This is done
2121 	 * because the change to the source is not (usually) being fixed.
2122 	 */
2123 	do {
2124 		port = 0;
2125 		in.s_addr = htonl(np->in_nip);
2126 		if (l == 0) {
2127 			/*
2128 			 * Check to see if there is an existing NAT
2129 			 * setup for this IP address pair.
2130 			 */
2131 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2132 					 in, 0, ifs);
2133 			if (hm != NULL)
2134 				in.s_addr = hm->hm_mapip.s_addr;
2135 		} else if ((l == 1) && (hm != NULL)) {
2136 			fr_hostmapdel(&hm);
2137 		}
2138 		in.s_addr = ntohl(in.s_addr);
2139 
2140 		nat->nat_hm = hm;
2141 
2142 		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2143 			if (l > 0)
2144 				return -1;
2145 		}
2146 
2147 		if (np->in_redir == NAT_BIMAP &&
2148 		    np->in_inmsk == np->in_outmsk) {
2149 			/*
2150 			 * map the address block in a 1:1 fashion
2151 			 */
2152 			in.s_addr = np->in_outip;
2153 			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2154 			in.s_addr = ntohl(in.s_addr);
2155 
2156 		} else if (np->in_redir & NAT_MAPBLK) {
2157 			if ((l >= np->in_ppip) || ((l > 0) &&
2158 			     !(flags & IPN_TCPUDP)))
2159 				return -1;
2160 			/*
2161 			 * map-block - Calculate destination address.
2162 			 */
2163 			in.s_addr = ntohl(fin->fin_saddr);
2164 			in.s_addr &= ntohl(~np->in_inmsk);
2165 			inb.s_addr = in.s_addr;
2166 			in.s_addr /= np->in_ippip;
2167 			in.s_addr &= ntohl(~np->in_outmsk);
2168 			in.s_addr += ntohl(np->in_outip);
2169 			/*
2170 			 * Calculate destination port.
2171 			 */
2172 			if ((flags & IPN_TCPUDP) &&
2173 			    (np->in_ppip != 0)) {
2174 				port = ntohs(sport) + l;
2175 				port %= np->in_ppip;
2176 				port += np->in_ppip *
2177 					(inb.s_addr % np->in_ippip);
2178 				port += MAPBLK_MINPORT;
2179 				port = htons(port);
2180 			}
2181 
2182 		} else if ((np->in_outip == 0) &&
2183 			   (np->in_outmsk == 0xffffffff)) {
2184 			/*
2185 			 * 0/32 - use the interface's IP address.
2186 			 */
2187 			if ((l > 0) ||
2188 			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2189 				       &in, NULL, fin->fin_ifs) == -1)
2190 				return -1;
2191 			in.s_addr = ntohl(in.s_addr);
2192 
2193 		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2194 			/*
2195 			 * 0/0 - use the original source address/port.
2196 			 */
2197 			if (l > 0)
2198 				return -1;
2199 			in.s_addr = ntohl(fin->fin_saddr);
2200 
2201 		} else if ((np->in_outmsk != 0xffffffff) &&
2202 			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2203 			np->in_nip++;
2204 
2205 		natl = NULL;
2206 
2207 		if ((flags & IPN_TCPUDP) &&
2208 		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2209 		    (np->in_flags & IPN_AUTOPORTMAP)) {
2210 			/*
2211 			 * "ports auto" (without map-block)
2212 			 */
2213 			if ((l > 0) && (l % np->in_ppip == 0)) {
2214 				if (l > np->in_space) {
2215 					return -1;
2216 				} else if ((l > np->in_ppip) &&
2217 					   np->in_outmsk != 0xffffffff)
2218 					np->in_nip++;
2219 			}
2220 			if (np->in_ppip != 0) {
2221 				port = ntohs(sport);
2222 				port += (l % np->in_ppip);
2223 				port %= np->in_ppip;
2224 				port += np->in_ppip *
2225 					(ntohl(fin->fin_saddr) %
2226 					 np->in_ippip);
2227 				port += MAPBLK_MINPORT;
2228 				port = htons(port);
2229 			}
2230 
2231 		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2232 			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2233 			/*
2234 			 * Standard port translation.  Select next port.
2235 			 */
2236 			if (np->in_flags & IPN_SEQUENTIAL) {
2237 				port = np->in_pnext;
2238 			} else {
2239 				port = ipf_random() % (ntohs(np->in_pmax) -
2240 						       ntohs(np->in_pmin) + 1);
2241 				port += ntohs(np->in_pmin);
2242 			}
2243 			port = htons(port);
2244 			np->in_pnext++;
2245 
2246 			if (np->in_pnext > ntohs(np->in_pmax)) {
2247 				np->in_pnext = ntohs(np->in_pmin);
2248 				if (np->in_outmsk != 0xffffffff)
2249 					np->in_nip++;
2250 			}
2251 		}
2252 
2253 		if (np->in_flags & IPN_IPRANGE) {
2254 			if (np->in_nip > ntohl(np->in_outmsk))
2255 				np->in_nip = ntohl(np->in_outip);
2256 		} else {
2257 			if ((np->in_outmsk != 0xffffffff) &&
2258 			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2259 			    ntohl(np->in_outip))
2260 				np->in_nip = ntohl(np->in_outip) + 1;
2261 		}
2262 
2263 		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2264 			port = sport;
2265 
2266 		/*
2267 		 * Here we do a lookup of the connection as seen from
2268 		 * the outside.  If an IP# pair already exists, try
2269 		 * again.  So if you have A->B becomes C->B, you can
2270 		 * also have D->E become C->E but not D->B causing
2271 		 * another C->B.  Also take protocol and ports into
2272 		 * account when determining whether a pre-existing
2273 		 * NAT setup will cause an external conflict where
2274 		 * this is appropriate.
2275 		 */
2276 		inb.s_addr = htonl(in.s_addr);
2277 		sp = fin->fin_data[0];
2278 		dp = fin->fin_data[1];
2279 		fin->fin_data[0] = fin->fin_data[1];
2280 		fin->fin_data[1] = htons(port);
2281 		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2282 				    (u_int)fin->fin_p, fin->fin_dst, inb);
2283 		fin->fin_data[0] = sp;
2284 		fin->fin_data[1] = dp;
2285 
2286 		/*
2287 		 * Has the search wrapped around and come back to the
2288 		 * start ?
2289 		 */
2290 		if ((natl != NULL) &&
2291 		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2292 		    (np->in_nip != 0) && (st_ip == np->in_nip))
2293 			return -1;
2294 		l++;
2295 	} while (natl != NULL);
2296 
2297 	if (np->in_space > 0)
2298 		np->in_space--;
2299 
2300 	/* Setup the NAT table */
2301 	nat->nat_inip = fin->fin_src;
2302 	nat->nat_outip.s_addr = htonl(in.s_addr);
2303 	nat->nat_oip = fin->fin_dst;
2304 	if (nat->nat_hm == NULL)
2305 		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2306 					  nat->nat_outip, 0, ifs);
2307 
2308 	if (flags & IPN_TCPUDP) {
2309 		nat->nat_inport = sport;
2310 		nat->nat_outport = port;	/* sport */
2311 		nat->nat_oport = dport;
2312 		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2313 	} else if (flags & IPN_ICMPQUERY) {
2314 		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2315 		nat->nat_inport = port;
2316 		nat->nat_outport = port;
2317 	}
2318 
2319 	ni->nai_ip.s_addr = in.s_addr;
2320 	ni->nai_port = port;
2321 	ni->nai_nport = dport;
2322 	return 0;
2323 }
2324 
2325 
2326 /* ------------------------------------------------------------------------ */
2327 /* Function:    nat_newrdr                                                  */
2328 /* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2329 /*                    allow rule to be moved if IPN_ROUNDR is set.          */
2330 /* Parameters:  fin(I) - pointer to packet information                      */
2331 /*              nat(I) - pointer to NAT entry                               */
2332 /*              ni(I)  - pointer to structure with misc. information needed */
2333 /*                       to create new NAT entry.                           */
2334 /*                                                                          */
2335 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2336 /* to the new IP address for the translation.                               */
2337 /* ------------------------------------------------------------------------ */
nat_newrdr(fin,nat,ni)2338 static INLINE int nat_newrdr(fin, nat, ni)
2339 fr_info_t *fin;
2340 nat_t *nat;
2341 natinfo_t *ni;
2342 {
2343 	u_short nport, dport, sport;
2344 	struct in_addr in, inb;
2345 	u_short sp, dp;
2346 	hostmap_t *hm;
2347 	u_32_t flags;
2348 	ipnat_t *np;
2349 	nat_t *natl;
2350 	int move;
2351 	ipf_stack_t *ifs = fin->fin_ifs;
2352 
2353 	move = 1;
2354 	hm = NULL;
2355 	in.s_addr = 0;
2356 	np = ni->nai_np;
2357 	flags = ni->nai_flags;
2358 	sport = ni->nai_sport;
2359 	dport = ni->nai_dport;
2360 
2361 	/*
2362 	 * If the matching rule has IPN_STICKY set, then we want to have the
2363 	 * same rule kick in as before.  Why would this happen?  If you have
2364 	 * a collection of rdr rules with "round-robin sticky", the current
2365 	 * packet might match a different one to the previous connection but
2366 	 * we want the same destination to be used.
2367 	 */
2368 	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2369 	    (IPN_ROUNDR|IPN_STICKY)) {
2370 		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2371 				 (u_32_t)dport, ifs);
2372 		if (hm != NULL) {
2373 			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2374 			np = hm->hm_ipnat;
2375 			ni->nai_np = np;
2376 			move = 0;
2377 		}
2378 	}
2379 
2380 	/*
2381 	 * Otherwise, it's an inbound packet. Most likely, we don't
2382 	 * want to rewrite source ports and source addresses. Instead,
2383 	 * we want to rewrite to a fixed internal address and fixed
2384 	 * internal port.
2385 	 */
2386 	if (np->in_flags & IPN_SPLIT) {
2387 		in.s_addr = np->in_nip;
2388 
2389 		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2390 			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2391 					 in, (u_32_t)dport, ifs);
2392 			if (hm != NULL) {
2393 				in.s_addr = hm->hm_mapip.s_addr;
2394 				move = 0;
2395 			}
2396 		}
2397 
2398 		if (hm == NULL || hm->hm_ref == 1) {
2399 			if (np->in_inip == htonl(in.s_addr)) {
2400 				np->in_nip = ntohl(np->in_inmsk);
2401 				move = 0;
2402 			} else {
2403 				np->in_nip = ntohl(np->in_inip);
2404 			}
2405 		}
2406 
2407 	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2408 		/*
2409 		 * 0/32 - use the interface's IP address.
2410 		 */
2411 		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2412 			   fin->fin_ifs) == -1)
2413 			return -1;
2414 		in.s_addr = ntohl(in.s_addr);
2415 
2416 	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2417 		/*
2418 		 * 0/0 - use the original destination address/port.
2419 		 */
2420 		in.s_addr = ntohl(fin->fin_daddr);
2421 
2422 	} else if (np->in_redir == NAT_BIMAP &&
2423 		   np->in_inmsk == np->in_outmsk) {
2424 		/*
2425 		 * map the address block in a 1:1 fashion
2426 		 */
2427 		in.s_addr = np->in_inip;
2428 		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2429 		in.s_addr = ntohl(in.s_addr);
2430 	} else {
2431 		in.s_addr = ntohl(np->in_inip);
2432 	}
2433 
2434 	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2435 		nport = dport;
2436 	else {
2437 		/*
2438 		 * Whilst not optimized for the case where
2439 		 * pmin == pmax, the gain is not significant.
2440 		 */
2441 		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2442 		    (np->in_pmin != np->in_pmax)) {
2443 			nport = ntohs(dport) - ntohs(np->in_pmin) +
2444 				ntohs(np->in_pnext);
2445 			nport = htons(nport);
2446 		} else
2447 			nport = np->in_pnext;
2448 	}
2449 
2450 	/*
2451 	 * When the redirect-to address is set to 0.0.0.0, just
2452 	 * assume a blank `forwarding' of the packet.  We don't
2453 	 * setup any translation for this either.
2454 	 */
2455 	if (in.s_addr == 0) {
2456 		if (nport == dport)
2457 			return -1;
2458 		in.s_addr = ntohl(fin->fin_daddr);
2459 	}
2460 
2461 	/*
2462 	 * Check to see if this redirect mapping already exists and if
2463 	 * it does, return "failure" (allowing it to be created will just
2464 	 * cause one or both of these "connections" to stop working.)
2465 	 */
2466 	inb.s_addr = htonl(in.s_addr);
2467 	sp = fin->fin_data[0];
2468 	dp = fin->fin_data[1];
2469 	fin->fin_data[1] = fin->fin_data[0];
2470 	fin->fin_data[0] = ntohs(nport);
2471 	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2472 		    (u_int)fin->fin_p, inb, fin->fin_src);
2473 	fin->fin_data[0] = sp;
2474 	fin->fin_data[1] = dp;
2475 	if (natl != NULL)
2476 		return (-1);
2477 
2478 	nat->nat_inip.s_addr = htonl(in.s_addr);
2479 	nat->nat_outip = fin->fin_dst;
2480 	nat->nat_oip = fin->fin_src;
2481 
2482 	ni->nai_ip.s_addr = in.s_addr;
2483 	ni->nai_nport = nport;
2484 	ni->nai_port = sport;
2485 
2486 	if (flags & IPN_TCPUDP) {
2487 		nat->nat_inport = nport;
2488 		nat->nat_outport = dport;
2489 		nat->nat_oport = sport;
2490 		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2491 	} else if (flags & IPN_ICMPQUERY) {
2492 		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2493 		nat->nat_inport = nport;
2494 		nat->nat_outport = nport;
2495 	}
2496 
2497 	return move;
2498 }
2499 
2500 /* ------------------------------------------------------------------------ */
2501 /* Function:    nat_new                                                     */
2502 /* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2503 /*                       else pointer to new NAT structure                  */
2504 /* Parameters:  fin(I)       - pointer to packet information                */
2505 /*              np(I)        - pointer to NAT rule                          */
2506 /*              natsave(I)   - pointer to where to store NAT struct pointer */
2507 /*              flags(I)     - flags describing the current packet          */
2508 /*              direction(I) - direction of packet (in/out)                 */
2509 /* Write Lock:  ipf_nat                                                     */
2510 /*                                                                          */
2511 /* Attempts to create a new NAT entry.  Does not actually change the packet */
2512 /* in any way.                                                              */
2513 /*                                                                          */
2514 /* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2515 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2516 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2517 /* and (3) building that structure and putting it into the NAT table(s).    */
2518 /* ------------------------------------------------------------------------ */
nat_new(fin,np,natsave,flags,direction)2519 nat_t *nat_new(fin, np, natsave, flags, direction)
2520 fr_info_t *fin;
2521 ipnat_t *np;
2522 nat_t **natsave;
2523 u_int flags;
2524 int direction;
2525 {
2526 	tcphdr_t *tcp = NULL;
2527 	hostmap_t *hm = NULL;
2528 	nat_t *nat, *natl;
2529 	u_int nflags;
2530 	natinfo_t ni;
2531 	int move;
2532 	ipf_stack_t *ifs = fin->fin_ifs;
2533 
2534 	/*
2535 	 * Trigger automatic call to nat_flushtable() if the
2536 	 * table has reached capcity specified by hi watermark.
2537 	 */
2538 	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
2539 		ifs->ifs_nat_doflush = 1;
2540 
2541 	/*
2542 	 * If automatic flushing did not do its job, and the table
2543 	 * has filled up, don't try to create a new entry.
2544 	 */
2545 	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2546 		ifs->ifs_nat_stats.ns_memfail++;
2547 		return NULL;
2548 	}
2549 
2550 	move = 1;
2551 	nflags = np->in_flags & flags;
2552 	nflags &= NAT_FROMRULE;
2553 
2554 	ni.nai_np = np;
2555 	ni.nai_nflags = nflags;
2556 	ni.nai_flags = flags;
2557 
2558 	/* Give me a new nat */
2559 	KMALLOC(nat, nat_t *);
2560 	if (nat == NULL) {
2561 		ifs->ifs_nat_stats.ns_memfail++;
2562 		/*
2563 		 * Try to automatically tune the max # of entries in the
2564 		 * table allowed to be less than what will cause kmem_alloc()
2565 		 * to fail and try to eliminate panics due to out of memory
2566 		 * conditions arising.
2567 		 */
2568 		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2569 			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2570 			printf("ipf_nattable_max reduced to %d\n",
2571 				ifs->ifs_ipf_nattable_max);
2572 		}
2573 		return NULL;
2574 	}
2575 
2576 	if (flags & IPN_TCPUDP) {
2577 		tcp = fin->fin_dp;
2578 		ni.nai_sport = htons(fin->fin_sport);
2579 		ni.nai_dport = htons(fin->fin_dport);
2580 	} else if (flags & IPN_ICMPQUERY) {
2581 		/*
2582 		 * In the ICMP query NAT code, we translate the ICMP id fields
2583 		 * to make them unique. This is indepedent of the ICMP type
2584 		 * (e.g. in the unlikely event that a host sends an echo and
2585 		 * an tstamp request with the same id, both packets will have
2586 		 * their ip address/id field changed in the same way).
2587 		 */
2588 		/* The icmp_id field is used by the sender to identify the
2589 		 * process making the icmp request. (the receiver justs
2590 		 * copies it back in its response). So, it closely matches
2591 		 * the concept of source port. We overlay sport, so we can
2592 		 * maximally reuse the existing code.
2593 		 */
2594 		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2595 		ni.nai_dport = ni.nai_sport;
2596 	}
2597 
2598 	bzero((char *)nat, sizeof(*nat));
2599 	nat->nat_flags = flags;
2600 	nat->nat_redir = np->in_redir;
2601 
2602 	if ((flags & NAT_SLAVE) == 0) {
2603 		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2604 	}
2605 
2606 	/*
2607 	 * Search the current table for a match.
2608 	 */
2609 	if (direction == NAT_OUTBOUND) {
2610 		/*
2611 		 * We can now arrange to call this for the same connection
2612 		 * because ipf_nat_new doesn't protect the code path into
2613 		 * this function.
2614 		 */
2615 		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2616 				     fin->fin_src, fin->fin_dst);
2617 		if (natl != NULL) {
2618 			KFREE(nat);
2619 			nat = natl;
2620 			goto done;
2621 		}
2622 
2623 		move = nat_newmap(fin, nat, &ni);
2624 		if (move == -1)
2625 			goto badnat;
2626 
2627 		np = ni.nai_np;
2628 	} else {
2629 		/*
2630 		 * NAT_INBOUND is used only for redirects rules
2631 		 */
2632 		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2633 				    fin->fin_src, fin->fin_dst);
2634 		if (natl != NULL) {
2635 			KFREE(nat);
2636 			nat = natl;
2637 			goto done;
2638 		}
2639 
2640 		move = nat_newrdr(fin, nat, &ni);
2641 		if (move == -1)
2642 			goto badnat;
2643 
2644 		np = ni.nai_np;
2645 	}
2646 
2647 	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2648 		if (np->in_redir == NAT_REDIRECT) {
2649 			nat_delrdr(np);
2650 			nat_addrdr(np, ifs);
2651 		} else if (np->in_redir == NAT_MAP) {
2652 			nat_delnat(np);
2653 			nat_addnat(np, ifs);
2654 		}
2655 	}
2656 
2657 	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2658 		goto badnat;
2659 	}
2660 
2661 	nat_calc_chksum_diffs(nat);
2662 
2663 	if (flags & SI_WILDP)
2664 		ifs->ifs_nat_stats.ns_wilds++;
2665 	fin->fin_flx |= FI_NEWNAT;
2666 	goto done;
2667 badnat:
2668 	ifs->ifs_nat_stats.ns_badnat++;
2669 	if ((hm = nat->nat_hm) != NULL)
2670 		fr_hostmapdel(&hm);
2671 	KFREE(nat);
2672 	nat = NULL;
2673 done:
2674 	if ((flags & NAT_SLAVE) == 0) {
2675 		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2676 	}
2677 	return nat;
2678 }
2679 
2680 
2681 /* ------------------------------------------------------------------------ */
2682 /* Function:    nat_finalise                                                */
2683 /* Returns:     int - 0 == sucess, -1 == failure                            */
2684 /* Parameters:  fin(I) - pointer to packet information                      */
2685 /*              nat(I) - pointer to NAT entry                               */
2686 /*              ni(I)  - pointer to structure with misc. information needed */
2687 /*                       to create new NAT entry.                           */
2688 /* Write Lock:  ipf_nat                                                     */
2689 /*                                                                          */
2690 /* This is the tail end of constructing a new NAT entry and is the same     */
2691 /* for both IPv4 and IPv6.                                                  */
2692 /* ------------------------------------------------------------------------ */
2693 /*ARGSUSED*/
nat_finalise(fin,nat,ni,tcp,natsave,direction)2694 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2695 fr_info_t *fin;
2696 nat_t *nat;
2697 natinfo_t *ni;
2698 tcphdr_t *tcp;
2699 nat_t **natsave;
2700 int direction;
2701 {
2702 	frentry_t *fr;
2703 	ipnat_t *np;
2704 	ipf_stack_t *ifs = fin->fin_ifs;
2705 
2706 	np = ni->nai_np;
2707 
2708 	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2709 
2710 #ifdef	IPFILTER_SYNC
2711 	if ((nat->nat_flags & SI_CLONE) == 0)
2712 		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2713 #endif
2714 
2715 	nat->nat_me = natsave;
2716 	nat->nat_dir = direction;
2717 	nat->nat_ifps[0] = np->in_ifps[0];
2718 	nat->nat_ifps[1] = np->in_ifps[1];
2719 	nat->nat_ptr = np;
2720 	nat->nat_p = fin->fin_p;
2721 	nat->nat_v = fin->fin_v;
2722 	nat->nat_mssclamp = np->in_mssclamp;
2723 	fr = fin->fin_fr;
2724 	nat->nat_fr = fr;
2725 
2726 	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2727 		if (appr_new(fin, nat) == -1)
2728 			return -1;
2729 
2730 	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2731 		if (ifs->ifs_nat_logging)
2732 			nat_log(nat, (u_int)np->in_redir, ifs);
2733 		np->in_use++;
2734 		if (fr != NULL) {
2735 			MUTEX_ENTER(&fr->fr_lock);
2736 			fr->fr_ref++;
2737 			MUTEX_EXIT(&fr->fr_lock);
2738 		}
2739 		return 0;
2740 	}
2741 
2742 	/*
2743 	 * nat_insert failed, so cleanup time...
2744 	 */
2745 	return -1;
2746 }
2747 
2748 
2749 /* ------------------------------------------------------------------------ */
2750 /* Function:   nat_insert                                                   */
2751 /* Returns:    int - 0 == sucess, -1 == failure                             */
2752 /* Parameters: nat(I) - pointer to NAT structure                            */
2753 /*             rev(I) - flag indicating forward/reverse direction of packet */
2754 /* Write Lock: ipf_nat                                                      */
2755 /*                                                                          */
2756 /* Insert a NAT entry into the hash tables for searching and add it to the  */
2757 /* list of active NAT entries.  Adjust global counters when complete.       */
2758 /* ------------------------------------------------------------------------ */
nat_insert(nat,rev,ifs)2759 int	nat_insert(nat, rev, ifs)
2760 nat_t	*nat;
2761 int	rev;
2762 ipf_stack_t *ifs;
2763 {
2764 	u_int hv1, hv2;
2765 	nat_t **natp;
2766 
2767 	/*
2768 	 * Try and return an error as early as possible, so calculate the hash
2769 	 * entry numbers first and then proceed.
2770 	 */
2771 	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2772 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2773 				  0xffffffff);
2774 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2775 				  ifs->ifs_ipf_nattable_sz);
2776 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2777 				  0xffffffff);
2778 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2779 				  ifs->ifs_ipf_nattable_sz);
2780 	} else {
2781 		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2782 		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2783 				  ifs->ifs_ipf_nattable_sz);
2784 		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2785 		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2786 				  ifs->ifs_ipf_nattable_sz);
2787 	}
2788 
2789 	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2790 	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2791 		return -1;
2792 	}
2793 
2794 	nat->nat_hv[0] = hv1;
2795 	nat->nat_hv[1] = hv2;
2796 
2797 	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2798 
2799 	nat->nat_rev = rev;
2800 	nat->nat_ref = 1;
2801 	nat->nat_bytes[0] = 0;
2802 	nat->nat_pkts[0] = 0;
2803 	nat->nat_bytes[1] = 0;
2804 	nat->nat_pkts[1] = 0;
2805 
2806 	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2807 	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2808 
2809 	if (nat->nat_ifnames[1][0] !='\0') {
2810 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2811 		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2812 	} else {
2813 		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2814 			       LIFNAMSIZ);
2815 		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2816 		nat->nat_ifps[1] = nat->nat_ifps[0];
2817 	}
2818 
2819 	nat->nat_next = ifs->ifs_nat_instances;
2820 	nat->nat_pnext = &ifs->ifs_nat_instances;
2821 	if (ifs->ifs_nat_instances)
2822 		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2823 	ifs->ifs_nat_instances = nat;
2824 
2825 	natp = &ifs->ifs_nat_table[0][hv1];
2826 	if (*natp)
2827 		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2828 	nat->nat_phnext[0] = natp;
2829 	nat->nat_hnext[0] = *natp;
2830 	*natp = nat;
2831 	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2832 
2833 	natp = &ifs->ifs_nat_table[1][hv2];
2834 	if (*natp)
2835 		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2836 	nat->nat_phnext[1] = natp;
2837 	nat->nat_hnext[1] = *natp;
2838 	*natp = nat;
2839 	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2840 
2841 	fr_setnatqueue(nat, rev, ifs);
2842 
2843 	ifs->ifs_nat_stats.ns_added++;
2844 	ifs->ifs_nat_stats.ns_inuse++;
2845 	return 0;
2846 }
2847 
2848 
2849 /* ------------------------------------------------------------------------ */
2850 /* Function:    nat_icmperrorlookup                                         */
2851 /* Returns:     nat_t* - point to matching NAT structure                    */
2852 /* Parameters:  fin(I) - pointer to packet information                      */
2853 /*              dir(I) - direction of packet (in/out)                       */
2854 /*                                                                          */
2855 /* Check if the ICMP error message is related to an existing TCP, UDP or    */
2856 /* ICMP query nat entry.  It is assumed that the packet is already of the   */
2857 /* the required length.                                                     */
2858 /* ------------------------------------------------------------------------ */
nat_icmperrorlookup(fin,dir)2859 nat_t *nat_icmperrorlookup(fin, dir)
2860 fr_info_t *fin;
2861 int dir;
2862 {
2863 	int flags = 0, minlen;
2864 	icmphdr_t *orgicmp;
2865 	tcphdr_t *tcp = NULL;
2866 	u_short data[2];
2867 	nat_t *nat;
2868 	ip_t *oip;
2869 	u_int p;
2870 
2871 	/*
2872 	 * Does it at least have the return (basic) IP header ?
2873 	 * Only a basic IP header (no options) should be with an ICMP error
2874 	 * header.  Also, if it's not an error type, then return.
2875 	 */
2876 	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2877 		return NULL;
2878 
2879 	/*
2880 	 * Check packet size
2881 	 */
2882 	oip = (ip_t *)((char *)fin->fin_dp + 8);
2883 	minlen = IP_HL(oip) << 2;
2884 	if ((minlen < sizeof(ip_t)) ||
2885 	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2886 		return NULL;
2887 	/*
2888 	 * Is the buffer big enough for all of it ?  It's the size of the IP
2889 	 * header claimed in the encapsulated part which is of concern.  It
2890 	 * may be too big to be in this buffer but not so big that it's
2891 	 * outside the ICMP packet, leading to TCP deref's causing problems.
2892 	 * This is possible because we don't know how big oip_hl is when we
2893 	 * do the pullup early in fr_check() and thus can't gaurantee it is
2894 	 * all here now.
2895 	 */
2896 #ifdef  _KERNEL
2897 	{
2898 	mb_t *m;
2899 
2900 	m = fin->fin_m;
2901 # if defined(MENTAT)
2902 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2903 		return NULL;
2904 # else
2905 	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2906 	    (char *)fin->fin_ip + M_LEN(m))
2907 		return NULL;
2908 # endif
2909 	}
2910 #endif
2911 
2912 	if (fin->fin_daddr != oip->ip_src.s_addr)
2913 		return NULL;
2914 
2915 	p = oip->ip_p;
2916 	if (p == IPPROTO_TCP)
2917 		flags = IPN_TCP;
2918 	else if (p == IPPROTO_UDP)
2919 		flags = IPN_UDP;
2920 	else if (p == IPPROTO_ICMP) {
2921 		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2922 
2923 		/* see if this is related to an ICMP query */
2924 		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2925 			data[0] = fin->fin_data[0];
2926 			data[1] = fin->fin_data[1];
2927 			fin->fin_data[0] = 0;
2928 			fin->fin_data[1] = orgicmp->icmp_id;
2929 
2930 			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2931 			/*
2932 			 * NOTE : dir refers to the direction of the original
2933 			 *        ip packet. By definition the icmp error
2934 			 *        message flows in the opposite direction.
2935 			 */
2936 			if (dir == NAT_INBOUND)
2937 				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2938 						   oip->ip_src);
2939 			else
2940 				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2941 						    oip->ip_src);
2942 			fin->fin_data[0] = data[0];
2943 			fin->fin_data[1] = data[1];
2944 			return nat;
2945 		}
2946 	}
2947 
2948 	if (flags & IPN_TCPUDP) {
2949 		minlen += 8;		/* + 64bits of data to get ports */
2950 		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2951 			return NULL;
2952 
2953 		data[0] = fin->fin_data[0];
2954 		data[1] = fin->fin_data[1];
2955 		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2956 		fin->fin_data[0] = ntohs(tcp->th_dport);
2957 		fin->fin_data[1] = ntohs(tcp->th_sport);
2958 
2959 		if (dir == NAT_INBOUND) {
2960 			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2961 					   oip->ip_src);
2962 		} else {
2963 			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2964 					    oip->ip_src);
2965 		}
2966 		fin->fin_data[0] = data[0];
2967 		fin->fin_data[1] = data[1];
2968 		return nat;
2969 	}
2970 	if (dir == NAT_INBOUND)
2971 		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2972 	else
2973 		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2974 }
2975 
2976 
2977 /* ------------------------------------------------------------------------ */
2978 /* Function:    nat_icmperror                                               */
2979 /* Returns:     nat_t* - point to matching NAT structure                    */
2980 /* Parameters:  fin(I)    - pointer to packet information                   */
2981 /*              nflags(I) - NAT flags for this packet                       */
2982 /*              dir(I)    - direction of packet (in/out)                    */
2983 /*                                                                          */
2984 /* Fix up an ICMP packet which is an error message for an existing NAT      */
2985 /* session.  This will correct both packet header data and checksums.       */
2986 /*                                                                          */
2987 /* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2988 /* a NAT'd ICMP packet gets correctly recognised.                           */
2989 /* ------------------------------------------------------------------------ */
nat_icmperror(fin,nflags,dir)2990 nat_t *nat_icmperror(fin, nflags, dir)
2991 fr_info_t *fin;
2992 u_int *nflags;
2993 int dir;
2994 {
2995 	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2996 	struct in_addr in;
2997 	icmphdr_t *icmp, *orgicmp;
2998 	int dlen;
2999 	udphdr_t *udp;
3000 	tcphdr_t *tcp;
3001 	nat_t *nat;
3002 	ip_t *oip;
3003 	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3004 		return NULL;
3005 
3006 	/*
3007 	 * nat_icmperrorlookup() looks up nat entry associated with the
3008 	 * offending IP packet and returns pointer to the entry, or NULL
3009 	 * if packet wasn't natted or for `defective' packets.
3010 	 */
3011 
3012 	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3013 		return NULL;
3014 
3015 	sumd2 = 0;
3016 	*nflags = IPN_ICMPERR;
3017 	icmp = fin->fin_dp;
3018 	oip = (ip_t *)&icmp->icmp_ip;
3019 	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3020 	tcp = (tcphdr_t *)udp;
3021 	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3022 
3023 	/*
3024 	 * Need to adjust ICMP header to include the real IP#'s and
3025 	 * port #'s.  There are three steps required.
3026 	 *
3027 	 * Step 1
3028 	 * Fix the IP addresses in the offending IP packet and update
3029 	 * ip header checksum to compensate for the change.
3030 	 *
3031 	 * No update needed here for icmp_cksum because the ICMP checksum
3032 	 * is calculated over the complete ICMP packet, which includes the
3033 	 * changed oip IP addresses and oip->ip_sum.  These two changes
3034 	 * cancel each other out (if the delta for the IP address is x,
3035 	 * then the delta for ip_sum is minus x).
3036 	 */
3037 
3038 	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3039 		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3040 		in = nat->nat_inip;
3041 		oip->ip_src = in;
3042 	} else {
3043 		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3044 		in = nat->nat_outip;
3045 		oip->ip_dst = in;
3046 	}
3047 
3048 	sum2 = LONG_SUM(ntohl(in.s_addr));
3049 	CALC_SUMD(sum1, sum2, sumd);
3050 	fix_datacksum(&oip->ip_sum, sumd);
3051 
3052 	/*
3053 	 * Step 2
3054 	 * Perform other adjustments based on protocol of offending packet.
3055 	 */
3056 
3057 	switch (oip->ip_p) {
3058 		case IPPROTO_TCP :
3059 		case IPPROTO_UDP :
3060 
3061 			/*
3062 			* For offending TCP/UDP IP packets, translate the ports
3063 			* based on the NAT specification.
3064 			*
3065 			* Advance notice : Now it becomes complicated :-)
3066 			*
3067 			* Since the port and IP addresse fields are both part
3068 			* of the TCP/UDP checksum of the offending IP packet,
3069 			* we need to adjust that checksum as well.
3070 			*
3071 			* To further complicate things, the TCP/UDP checksum
3072 			* may not be present.  We must check to see if the
3073 			* length of the data portion is big enough to hold
3074 			* the checksum.  In the UDP case, a test to determine
3075 			* if the checksum is even set is also required.
3076 			*
3077 			* Any changes to an IP address, port or checksum within
3078 			* the ICMP packet requires a change to icmp_cksum.
3079 			*
3080 			* Be extremely careful here ... The change is dependent
3081 			* upon whether or not the TCP/UPD checksum is present.
3082 			*
3083 			* If TCP/UPD checksum is present, the icmp_cksum must
3084 			* compensate for checksum modification resulting from
3085 			* IP address change only.  Port change and resulting
3086 			* data checksum adjustments cancel each other out.
3087 			*
3088 			* If TCP/UDP checksum is not present, icmp_cksum must
3089 			* compensate for port change only.  The IP address
3090 			* change does not modify anything else in this case.
3091 			*/
3092 
3093 			psum1 = 0;
3094 			psum2 = 0;
3095 			psumd = 0;
3096 
3097 			if ((tcp->th_dport == nat->nat_oport) &&
3098 			    (tcp->th_sport != nat->nat_inport)) {
3099 
3100 				/*
3101 				 * Translate the source port.
3102 				 */
3103 
3104 				psum1 = ntohs(tcp->th_sport);
3105 				psum2 = ntohs(nat->nat_inport);
3106 				tcp->th_sport = nat->nat_inport;
3107 
3108 			} else if ((tcp->th_sport == nat->nat_oport) &&
3109 				    (tcp->th_dport != nat->nat_outport)) {
3110 
3111 				/*
3112 				 * Translate the destination port.
3113 				 */
3114 
3115 				psum1 = ntohs(tcp->th_dport);
3116 				psum2 = ntohs(nat->nat_outport);
3117 				tcp->th_dport = nat->nat_outport;
3118 			}
3119 
3120 			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3121 
3122 				/*
3123 				 * TCP checksum present.
3124 				 *
3125 				 * Adjust data checksum and icmp checksum to
3126 				 * compensate for any IP address change.
3127 				 */
3128 
3129 				sum1 = ntohs(tcp->th_sum);
3130 				fix_datacksum(&tcp->th_sum, sumd);
3131 				sum2 = ntohs(tcp->th_sum);
3132 				sumd2 = sumd << 1;
3133 				CALC_SUMD(sum1, sum2, sumd);
3134 				sumd2 += sumd;
3135 
3136 				/*
3137 				 * Also make data checksum adjustment to
3138 				 * compensate for any port change.
3139 				 */
3140 
3141 				if (psum1 != psum2) {
3142 					CALC_SUMD(psum1, psum2, psumd);
3143 					fix_datacksum(&tcp->th_sum, psumd);
3144 				}
3145 
3146 			} else if ((oip->ip_p == IPPROTO_UDP) &&
3147 				   (dlen >= 8) && (udp->uh_sum != 0)) {
3148 
3149 				/*
3150 				 * The UDP checksum is present and set.
3151 				 *
3152 				 * Adjust data checksum and icmp checksum to
3153 				 * compensate for any IP address change.
3154 				 */
3155 
3156 				sum1 = ntohs(udp->uh_sum);
3157 				fix_datacksum(&udp->uh_sum, sumd);
3158 				sum2 = ntohs(udp->uh_sum);
3159 				sumd2 = sumd << 1;
3160 				CALC_SUMD(sum1, sum2, sumd);
3161 				sumd2 += sumd;
3162 
3163 				/*
3164 				 * Also make data checksum adjustment to
3165 				 * compensate for any port change.
3166 				 */
3167 
3168 				if (psum1 != psum2) {
3169 					CALC_SUMD(psum1, psum2, psumd);
3170 					fix_datacksum(&udp->uh_sum, psumd);
3171 				}
3172 
3173 			} else {
3174 
3175 				/*
3176 				 * Data checksum was not present.
3177 				 *
3178 				 * Compensate for any port change.
3179 				 */
3180 
3181 				CALC_SUMD(psum2, psum1, psumd);
3182 				sumd2 += psumd;
3183 			}
3184 			break;
3185 
3186 		case IPPROTO_ICMP :
3187 
3188 			orgicmp = (icmphdr_t *)udp;
3189 
3190 			if ((nat->nat_dir == NAT_OUTBOUND) &&
3191 			    (orgicmp->icmp_id != nat->nat_inport) &&
3192 			    (dlen >= 8)) {
3193 
3194 				/*
3195 				 * Fix ICMP checksum (of the offening ICMP
3196 				 * query packet) to compensate the change
3197 				 * in the ICMP id of the offending ICMP
3198 				 * packet.
3199 				 *
3200 				 * Since you modify orgicmp->icmp_id with
3201 				 * a delta (say x) and you compensate that
3202 				 * in origicmp->icmp_cksum with a delta
3203 				 * minus x, you don't have to adjust the
3204 				 * overall icmp->icmp_cksum
3205 				 */
3206 
3207 				sum1 = ntohs(orgicmp->icmp_id);
3208 				sum2 = ntohs(nat->nat_inport);
3209 				CALC_SUMD(sum1,