1/*
2 * Copyright (C) 1995-2004 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7 * Use is subject to license terms.
8 */
9
10#if defined(KERNEL) || defined(_KERNEL)
11# undef KERNEL
12# undef _KERNEL
13# define        KERNEL	1
14# define        _KERNEL	1
15#endif
16#include <sys/errno.h>
17#include <sys/types.h>
18#include <sys/param.h>
19#include <sys/time.h>
20#include <sys/file.h>
21#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22    defined(_KERNEL)
23# include "opt_ipfilter_log.h"
24#endif
25#if !defined(_KERNEL)
26# include <stdio.h>
27# include <string.h>
28# include <stdlib.h>
29# define _KERNEL
30# ifdef __OpenBSD__
31struct file;
32# endif
33# include <sys/uio.h>
34# undef _KERNEL
35#endif
36#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37# include <sys/filio.h>
38# include <sys/fcntl.h>
39#else
40# include <sys/ioctl.h>
41#endif
42#if !defined(AIX)
43# include <sys/fcntl.h>
44#endif
45#if !defined(linux)
46# include <sys/protosw.h>
47#endif
48#include <sys/socket.h>
49#if defined(_KERNEL)
50# include <sys/systm.h>
51# if !defined(__SVR4) && !defined(__svr4__)
52#  include <sys/mbuf.h>
53# endif
54#endif
55#if defined(__SVR4) || defined(__svr4__)
56# include <sys/filio.h>
57# include <sys/byteorder.h>
58# ifdef _KERNEL
59#  include <sys/dditypes.h>
60# endif
61# include <sys/stream.h>
62# include <sys/kmem.h>
63#endif
64#if __FreeBSD_version >= 300000
65# include <sys/queue.h>
66#endif
67#include <net/if.h>
68#if __FreeBSD_version >= 300000
69# include <net/if_var.h>
70# if defined(_KERNEL) && !defined(IPFILTER_LKM)
71#  include "opt_ipfilter.h"
72# endif
73#endif
74#ifdef sun
75# include <net/af.h>
76#endif
77#include <net/route.h>
78#include <netinet/in.h>
79#include <netinet/in_systm.h>
80#include <netinet/ip.h>
81
82#ifdef RFC1825
83# include <vpn/md5.h>
84# include <vpn/ipsec.h>
85extern struct ifnet vpnif;
86#endif
87
88#if !defined(linux)
89# include <netinet/ip_var.h>
90#endif
91#include <netinet/tcp.h>
92#include <netinet/udp.h>
93#include <netinet/ip_icmp.h>
94#include "netinet/ip_compat.h"
95#include <netinet/tcpip.h>
96#include "netinet/ip_fil.h"
97#include "netinet/ip_nat.h"
98#include "netinet/ip_frag.h"
99#include "netinet/ip_state.h"
100#include "netinet/ip_proxy.h"
101#include "netinet/ipf_stack.h"
102#ifdef	IPFILTER_SYNC
103#include "netinet/ip_sync.h"
104#endif
105#if (__FreeBSD_version >= 300000)
106# include <sys/malloc.h>
107#endif
108/* END OF INCLUDES */
109
110#undef	SOCKADDR_IN
111#define	SOCKADDR_IN	struct sockaddr_in
112
113#if !defined(lint)
114static const char sccsid[] = "@(#)ip_nat.c	1.11 6/5/96 (C) 1995 Darren Reed";
115static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116#endif
117
118
119/* ======================================================================== */
120/* How the NAT is organised and works.                                      */
121/*                                                                          */
122/* Inside (interface y) NAT       Outside (interface x)                     */
123/* -------------------- -+- -------------------------------------           */
124/* Packet going          |   out, processsed by fr_checknatout() for x      */
125/* ------------>         |   ------------>                                  */
126/* src=10.1.1.1          |   src=192.1.1.1                                  */
127/*                       |                                                  */
128/*                       |   in, processed by fr_checknatin() for x         */
129/* <------------         |   <------------                                  */
130/* dst=10.1.1.1          |   dst=192.1.1.1                                  */
131/* -------------------- -+- -------------------------------------           */
132/* fr_checknatout() - changes ip_src and if required, sport                 */
133/*             - creates a new mapping, if required.                        */
134/* fr_checknatin()  - changes ip_dst and if required, dport                 */
135/*                                                                          */
136/* In the NAT table, internal source is recorded as "in" and externally     */
137/* seen as "out".                                                           */
138/* ======================================================================== */
139
140
141static	int	nat_clearlist __P((ipf_stack_t *));
142static	void	nat_addnat __P((struct ipnat *, ipf_stack_t *));
143static	void	nat_addrdr __P((struct ipnat *, ipf_stack_t *));
144static	int	fr_natgetent __P((caddr_t, ipf_stack_t *));
145static	int	fr_natgetsz __P((caddr_t, ipf_stack_t *));
146static	int	fr_natputent __P((caddr_t, int, ipf_stack_t *));
147static	void	nat_tabmove __P((nat_t *, ipf_stack_t *));
148static	int	nat_match __P((fr_info_t *, ipnat_t *));
149static	INLINE	int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
150static	INLINE	int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
151static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152				    struct in_addr, struct in_addr, u_32_t,
153				    ipf_stack_t *));
154static	INLINE	int nat_icmpquerytype4 __P((int));
155static	int	nat_ruleaddrinit __P((ipnat_t *));
156static	int	nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
157static	void	nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158static	INLINE	int nat_icmperrortype4 __P((int));
159static	INLINE	int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
160				      tcphdr_t *, nat_t **, int));
161static	INLINE	int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
162static	void	nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
163static	int	nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
164static	int	nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165static	int	nat_flushtable __P((int, ipf_stack_t *));
166
167#define NAT_HAS_L4_CHANGED(n)	\
168 	(((n)->nat_flags & (IPN_TCPUDPICMP)) && \
169 	(n)->nat_inport != (n)->nat_outport)
170
171
172/* ------------------------------------------------------------------------ */
173/* Function:    fr_natinit                                                  */
174/* Returns:     int - 0 == success, -1 == failure                           */
175/* Parameters:  Nil                                                         */
176/*                                                                          */
177/* Initialise all of the NAT locks, tables and other structures.            */
178/* ------------------------------------------------------------------------ */
179int fr_natinit(ifs)
180ipf_stack_t *ifs;
181{
182	int i;
183
184	KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
185		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
186	if (ifs->ifs_nat_table[0] != NULL)
187		bzero((char *)ifs->ifs_nat_table[0],
188		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
189	else
190		return -1;
191
192	KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
193		 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
194	if (ifs->ifs_nat_table[1] != NULL)
195		bzero((char *)ifs->ifs_nat_table[1],
196		      ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
197	else
198		return -2;
199
200	KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
201		 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
202	if (ifs->ifs_nat_rules != NULL)
203		bzero((char *)ifs->ifs_nat_rules,
204		      ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
205	else
206		return -3;
207
208	KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
209		 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
210	if (ifs->ifs_rdr_rules != NULL)
211		bzero((char *)ifs->ifs_rdr_rules,
212		      ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
213	else
214		return -4;
215
216	KMALLOCS(ifs->ifs_maptable, hostmap_t **,
217		 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
218	if (ifs->ifs_maptable != NULL)
219		bzero((char *)ifs->ifs_maptable,
220		      sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
221	else
222		return -5;
223
224	ifs->ifs_ipf_hm_maplist = NULL;
225
226	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
227		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
228	if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
229		return -1;
230	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
231	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
232
233	KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
234		 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
235	if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
236		return -1;
237	bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
238	      ifs->ifs_ipf_nattable_sz * sizeof(u_long));
239
240	if (ifs->ifs_fr_nat_maxbucket == 0) {
241		for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
242			ifs->ifs_fr_nat_maxbucket++;
243		ifs->ifs_fr_nat_maxbucket *= 2;
244	}
245
246	fr_sttab_init(ifs->ifs_nat_tqb, ifs);
247	/*
248	 * Increase this because we may have "keep state" following this too
249	 * and packet storms can occur if this is removed too quickly.
250	 */
251	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
252	ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
253	ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
254	ifs->ifs_nat_udptq.ifq_ref = 1;
255	ifs->ifs_nat_udptq.ifq_head = NULL;
256	ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
257	MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
258	ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
259	ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
260	ifs->ifs_nat_icmptq.ifq_ref = 1;
261	ifs->ifs_nat_icmptq.ifq_head = NULL;
262	ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
263	MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
264	ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
265	ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
266	ifs->ifs_nat_iptq.ifq_ref = 1;
267	ifs->ifs_nat_iptq.ifq_head = NULL;
268	ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
269	MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
270	ifs->ifs_nat_iptq.ifq_next = NULL;
271
272	for (i = 0; i < IPF_TCP_NSTATES; i++) {
273		if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
274			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
275#ifdef LARGE_NAT
276		else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
277			ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
278#endif
279	}
280
281	/*
282	 * Increase this because we may have "keep state" following
283	 * this too and packet storms can occur if this is removed
284	 * too quickly.
285	 */
286	ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
287	    ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
288
289	RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
290	RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
291	MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
292	MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
293
294	ifs->ifs_fr_nat_init = 1;
295	ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
296	return 0;
297}
298
299
300/* ------------------------------------------------------------------------ */
301/* Function:    nat_addrdr                                                  */
302/* Returns:     Nil                                                         */
303/* Parameters:  n(I) - pointer to NAT rule to add                           */
304/*                                                                          */
305/* Adds a redirect rule to the hash table of redirect rules and the list of */
306/* loaded NAT rules.  Updates the bitmask indicating which netmasks are in  */
307/* use by redirect rules.                                                   */
308/* ------------------------------------------------------------------------ */
309static void nat_addrdr(n, ifs)
310ipnat_t *n;
311ipf_stack_t *ifs;
312{
313	ipnat_t **np;
314	u_32_t j;
315	u_int hv;
316	int k;
317
318	k = count4bits(n->in_outmsk);
319	if ((k >= 0) && (k != 32))
320		ifs->ifs_rdr_masks |= 1 << k;
321	j = (n->in_outip & n->in_outmsk);
322	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
323	np = ifs->ifs_rdr_rules + hv;
324	while (*np != NULL)
325		np = &(*np)->in_rnext;
326	n->in_rnext = NULL;
327	n->in_prnext = np;
328	n->in_hv = hv;
329	*np = n;
330}
331
332
333/* ------------------------------------------------------------------------ */
334/* Function:    nat_addnat                                                  */
335/* Returns:     Nil                                                         */
336/* Parameters:  n(I) - pointer to NAT rule to add                           */
337/*                                                                          */
338/* Adds a NAT map rule to the hash table of rules and the list of  loaded   */
339/* NAT rules.  Updates the bitmask indicating which netmasks are in use by  */
340/* redirect rules.                                                          */
341/* ------------------------------------------------------------------------ */
342static void nat_addnat(n, ifs)
343ipnat_t *n;
344ipf_stack_t *ifs;
345{
346	ipnat_t **np;
347	u_32_t j;
348	u_int hv;
349	int k;
350
351	k = count4bits(n->in_inmsk);
352	if ((k >= 0) && (k != 32))
353		ifs->ifs_nat_masks |= 1 << k;
354	j = (n->in_inip & n->in_inmsk);
355	hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
356	np = ifs->ifs_nat_rules + hv;
357	while (*np != NULL)
358		np = &(*np)->in_mnext;
359	n->in_mnext = NULL;
360	n->in_pmnext = np;
361	n->in_hv = hv;
362	*np = n;
363}
364
365
366/* ------------------------------------------------------------------------ */
367/* Function:    nat_delrdr                                                  */
368/* Returns:     Nil                                                         */
369/* Parameters:  n(I) - pointer to NAT rule to delete                        */
370/*                                                                          */
371/* Removes a redirect rule from the hash table of redirect rules.           */
372/* ------------------------------------------------------------------------ */
373void nat_delrdr(n)
374ipnat_t *n;
375{
376	if (n->in_rnext)
377		n->in_rnext->in_prnext = n->in_prnext;
378	*n->in_prnext = n->in_rnext;
379}
380
381
382/* ------------------------------------------------------------------------ */
383/* Function:    nat_delnat                                                  */
384/* Returns:     Nil                                                         */
385/* Parameters:  n(I) - pointer to NAT rule to delete                        */
386/*                                                                          */
387/* Removes a NAT map rule from the hash table of NAT map rules.             */
388/* ------------------------------------------------------------------------ */
389void nat_delnat(n)
390ipnat_t *n;
391{
392	if (n->in_mnext != NULL)
393		n->in_mnext->in_pmnext = n->in_pmnext;
394	*n->in_pmnext = n->in_mnext;
395}
396
397
398/* ------------------------------------------------------------------------ */
399/* Function:    nat_hostmap                                                 */
400/* Returns:     struct hostmap* - NULL if no hostmap could be created,      */
401/*                                else a pointer to the hostmapping to use  */
402/* Parameters:  np(I)   - pointer to NAT rule                               */
403/*              real(I) - real IP address                                   */
404/*              map(I)  - mapped IP address                                 */
405/*              port(I) - destination port number                           */
406/* Write Locks: ipf_nat                                                     */
407/*                                                                          */
408/* Check if an ip address has already been allocated for a given mapping    */
409/* that is not doing port based translation.  If is not yet allocated, then */
410/* create a new entry if a non-NULL NAT rule pointer has been supplied.     */
411/* ------------------------------------------------------------------------ */
412static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
413ipnat_t *np;
414struct in_addr src;
415struct in_addr dst;
416struct in_addr map;
417u_32_t port;
418ipf_stack_t *ifs;
419{
420	hostmap_t *hm;
421	u_int hv;
422
423	hv = (src.s_addr ^ dst.s_addr);
424	hv += src.s_addr;
425	hv += dst.s_addr;
426	hv %= HOSTMAP_SIZE;
427	for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
428		if ((hm->hm_srcip.s_addr == src.s_addr) &&
429		    (hm->hm_dstip.s_addr == dst.s_addr) &&
430		    ((np == NULL) || (np == hm->hm_ipnat)) &&
431		    ((port == 0) || (port == hm->hm_port))) {
432			hm->hm_ref++;
433			return hm;
434		}
435
436	if (np == NULL)
437		return NULL;
438
439	KMALLOC(hm, hostmap_t *);
440	if (hm) {
441		hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
442		hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
443		if (ifs->ifs_ipf_hm_maplist != NULL)
444			ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
445		ifs->ifs_ipf_hm_maplist = hm;
446
447		hm->hm_next = ifs->ifs_maptable[hv];
448		hm->hm_pnext = ifs->ifs_maptable + hv;
449		if (ifs->ifs_maptable[hv] != NULL)
450			ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
451		ifs->ifs_maptable[hv] = hm;
452		hm->hm_ipnat = np;
453		hm->hm_srcip = src;
454		hm->hm_dstip = dst;
455		hm->hm_mapip = map;
456		hm->hm_ref = 1;
457		hm->hm_port = port;
458		hm->hm_v = 4;
459	}
460	return hm;
461}
462
463
464/* ------------------------------------------------------------------------ */
465/* Function:    fr_hostmapdel                                              */
466/* Returns:     Nil                                                         */
467/* Parameters:  hmp(I) - pointer to pointer to hostmap structure            */
468/* Write Locks: ipf_nat                                                     */
469/*                                                                          */
470/* Decrement the references to this hostmap structure by one.  If this      */
471/* reaches zero then remove it and free it.                                 */
472/* ------------------------------------------------------------------------ */
473void fr_hostmapdel(hmp)
474struct hostmap **hmp;
475{
476	struct hostmap *hm;
477
478	hm = *hmp;
479	*hmp = NULL;
480
481	hm->hm_ref--;
482	if (hm->hm_ref == 0) {
483		if (hm->hm_next)
484			hm->hm_next->hm_pnext = hm->hm_pnext;
485		*hm->hm_pnext = hm->hm_next;
486		if (hm->hm_hnext)
487			hm->hm_hnext->hm_phnext = hm->hm_phnext;
488		*hm->hm_phnext = hm->hm_hnext;
489		KFREE(hm);
490	}
491}
492
493
494/* ------------------------------------------------------------------------ */
495/* Function:    fix_outcksum                                                */
496/* Returns:     Nil                                                         */
497/* Parameters:  sp(I)  - location of 16bit checksum to update               */
498/*              n((I)  - amount to adjust checksum by                       */
499/*                                                                          */
500/* Adjusts the 16bit checksum by "n" for packets going out.                 */
501/* ------------------------------------------------------------------------ */
502void fix_outcksum(sp, n)
503u_short *sp;
504u_32_t n;
505{
506	u_short sumshort;
507	u_32_t sum1;
508
509	if (n == 0)
510		return;
511
512	sum1 = (~ntohs(*sp)) & 0xffff;
513	sum1 += (n);
514	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
515	/* Again */
516	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
517	sumshort = ~(u_short)sum1;
518	*(sp) = htons(sumshort);
519}
520
521
522/* ------------------------------------------------------------------------ */
523/* Function:    fix_incksum                                                 */
524/* Returns:     Nil                                                         */
525/* Parameters:  sp(I)  - location of 16bit checksum to update               */
526/*              n((I)  - amount to adjust checksum by                       */
527/*                                                                          */
528/* Adjusts the 16bit checksum by "n" for packets going in.                  */
529/* ------------------------------------------------------------------------ */
530void fix_incksum(sp, n)
531u_short *sp;
532u_32_t n;
533{
534	u_short sumshort;
535	u_32_t sum1;
536
537	if (n == 0)
538		return;
539
540	sum1 = (~ntohs(*sp)) & 0xffff;
541	sum1 += ~(n) & 0xffff;
542	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543	/* Again */
544	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545	sumshort = ~(u_short)sum1;
546	*(sp) = htons(sumshort);
547}
548
549
550/* ------------------------------------------------------------------------ */
551/* Function:    fix_datacksum                                               */
552/* Returns:     Nil                                                         */
553/* Parameters:  sp(I)  - location of 16bit checksum to update               */
554/*              n((I)  - amount to adjust checksum by                       */
555/*                                                                          */
556/* Fix_datacksum is used *only* for the adjustments of checksums in the     */
557/* data section of an IP packet.                                            */
558/*                                                                          */
559/* The only situation in which you need to do this is when NAT'ing an       */
560/* ICMP error message. Such a message, contains in its body the IP header   */
561/* of the original IP packet, that causes the error.                        */
562/*                                                                          */
563/* You can't use fix_incksum or fix_outcksum in that case, because for the  */
564/* kernel the data section of the ICMP error is just data, and no special   */
565/* processing like hardware cksum or ntohs processing have been done by the */
566/* kernel on the data section.                                              */
567/* ------------------------------------------------------------------------ */
568void fix_datacksum(sp, n)
569u_short *sp;
570u_32_t n;
571{
572	u_short sumshort;
573	u_32_t sum1;
574
575	if (n == 0)
576		return;
577
578	sum1 = (~ntohs(*sp)) & 0xffff;
579	sum1 += (n);
580	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
581	/* Again */
582	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
583	sumshort = ~(u_short)sum1;
584	*(sp) = htons(sumshort);
585}
586
587
588/* ------------------------------------------------------------------------ */
589/* Function:    fr_nat_ioctl                                                */
590/* Returns:     int - 0 == success, != 0 == failure                         */
591/* Parameters:  data(I) - pointer to ioctl data                             */
592/*              cmd(I)  - ioctl command integer                             */
593/*              mode(I) - file mode bits used with open                     */
594/*              uid(I)  - uid of caller                                     */
595/*              ctx(I)  - pointer to give the uid context                   */
596/*              ifs     - ipf stack instance                                */
597/*                                                                          */
598/* Processes an ioctl call made to operate on the IP Filter NAT device.     */
599/* ------------------------------------------------------------------------ */
600int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
601ioctlcmd_t cmd;
602caddr_t data;
603int mode, uid;
604void *ctx;
605ipf_stack_t *ifs;
606{
607	ipnat_t *nat, *nt, *n = NULL, **np = NULL;
608	int error = 0, ret, arg, getlock;
609	ipnat_t natd;
610
611#if (BSD >= 199306) && defined(_KERNEL)
612	if ((securelevel >= 2) && (mode & FWRITE))
613		return EPERM;
614#endif
615
616#if defined(__osf__) && defined(_KERNEL)
617	getlock = 0;
618#else
619	getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
620#endif
621
622	nat = NULL;     /* XXX gcc -Wuninitialized */
623	if (cmd == (ioctlcmd_t)SIOCADNAT) {
624		KMALLOC(nt, ipnat_t *);
625	} else {
626		nt = NULL;
627	}
628
629	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
630		if (mode & NAT_SYSSPACE) {
631			bcopy(data, (char *)&natd, sizeof(natd));
632			error = 0;
633		} else {
634			error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
635		}
636
637	}
638
639	if (error != 0)
640		goto done;
641
642	/*
643	 * For add/delete, look to see if the NAT entry is already present
644	 */
645	if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646		nat = &natd;
647		if (nat->in_v == 0)	/* For backward compat. */
648			nat->in_v = 4;
649		nat->in_flags &= IPN_USERFLAGS;
650		if ((nat->in_redir & NAT_MAPBLK) == 0) {
651			if ((nat->in_flags & IPN_SPLIT) == 0)
652				nat->in_inip &= nat->in_inmsk;
653			if ((nat->in_flags & IPN_IPRANGE) == 0)
654				nat->in_outip &= nat->in_outmsk;
655		}
656		MUTEX_ENTER(&ifs->ifs_ipf_natio);
657		for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
658		     np = &n->in_next)
659			if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
660			    IPN_CMPSIZ) == 0) {
661				if (nat->in_redir == NAT_REDIRECT &&
662				    nat->in_pnext != n->in_pnext)
663					continue;
664				break;
665			}
666	}
667
668	switch (cmd)
669	{
670	case SIOCGENITER :
671	    {
672		ipfgeniter_t iter;
673		ipftoken_t *token;
674
675		error = fr_inobj(data, &iter, IPFOBJ_GENITER);
676		if (error != 0)
677			break;
678
679		token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
680		if (token != NULL)
681			error  = nat_iterator(token, &iter, ifs);
682		else
683			error = ESRCH;
684		RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
685		break;
686	    }
687#ifdef  IPFILTER_LOG
688	case SIOCIPFFB :
689	{
690		int tmp;
691
692		if (!(mode & FWRITE))
693			error = EPERM;
694		else {
695			tmp = ipflog_clear(IPL_LOGNAT, ifs);
696			error = BCOPYOUT((char *)&tmp, (char *)data,
697					sizeof(tmp));
698			if (error != 0)
699				error = EFAULT;
700		}
701		break;
702	}
703	case SIOCSETLG :
704		if (!(mode & FWRITE)) {
705			error = EPERM;
706		} else {
707			error = BCOPYIN((char *)data,
708					(char *)&ifs->ifs_nat_logging,
709					sizeof(ifs->ifs_nat_logging));
710			if (error != 0)
711				error = EFAULT;
712		}
713		break;
714	case SIOCGETLG :
715		error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
716				sizeof(ifs->ifs_nat_logging));
717		if (error != 0)
718			error = EFAULT;
719		break;
720	case FIONREAD :
721		arg = ifs->ifs_iplused[IPL_LOGNAT];
722		error = BCOPYOUT(&arg, data, sizeof(arg));
723		if (error != 0)
724			error = EFAULT;
725		break;
726#endif
727	case SIOCADNAT :
728		if (!(mode & FWRITE)) {
729			error = EPERM;
730		} else if (n != NULL) {
731			error = EEXIST;
732		} else if (nt == NULL) {
733			error = ENOMEM;
734		}
735		if (error != 0) {
736			MUTEX_EXIT(&ifs->ifs_ipf_natio);
737			break;
738		}
739		bcopy((char *)nat, (char *)nt, sizeof(*n));
740		error = nat_siocaddnat(nt, np, getlock, ifs);
741		MUTEX_EXIT(&ifs->ifs_ipf_natio);
742		if (error == 0)
743			nt = NULL;
744		break;
745	case SIOCRMNAT :
746		if (!(mode & FWRITE)) {
747			error = EPERM;
748			n = NULL;
749		} else if (n == NULL) {
750			error = ESRCH;
751		}
752
753		if (error != 0) {
754			MUTEX_EXIT(&ifs->ifs_ipf_natio);
755			break;
756		}
757		nat_siocdelnat(n, np, getlock, ifs);
758
759		MUTEX_EXIT(&ifs->ifs_ipf_natio);
760		n = NULL;
761		break;
762	case SIOCGNATS :
763		ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
764		ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
765		ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
766		ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
767		ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
768		ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
769		ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
770		ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
771		ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
772		ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
773		ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
774		ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
775		error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
776		break;
777	case SIOCGNATL :
778	    {
779		natlookup_t nl;
780
781		if (getlock) {
782			READ_ENTER(&ifs->ifs_ipf_nat);
783		}
784		error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
785		if (nl.nl_v != 6)
786			nl.nl_v = 4;
787		if (error == 0) {
788			void *ptr;
789
790			switch (nl.nl_v)
791			{
792			case 4:
793				ptr = nat_lookupredir(&nl, ifs);
794				break;
795#ifdef	USE_INET6
796			case 6:
797				ptr = nat6_lookupredir(&nl, ifs);
798				break;
799#endif
800			default:
801				ptr = NULL;
802				break;
803			}
804
805			if (ptr != NULL) {
806				error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
807			} else {
808				error = ESRCH;
809			}
810		}
811		if (getlock) {
812			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
813		}
814		break;
815	    }
816	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
817		if (!(mode & FWRITE)) {
818			error = EPERM;
819			break;
820		}
821		if (getlock) {
822			WRITE_ENTER(&ifs->ifs_ipf_nat);
823		}
824		error = BCOPYIN(data, &arg, sizeof(arg));
825		if (error != 0) {
826			error = EFAULT;
827		} else {
828			if (arg == FLUSH_LIST)
829				ret = nat_clearlist(ifs);
830			else if (VALID_TABLE_FLUSH_OPT(arg))
831				ret = nat_flushtable(arg, ifs);
832			else
833				error = EINVAL;
834		}
835		if (getlock) {
836			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
837		}
838		if (error == 0) {
839			error = BCOPYOUT(&ret, data, sizeof(ret));
840			if (error != 0)
841				error = EFAULT;
842		}
843		break;
844	case SIOCPROXY :
845		error = appr_ioctl(data, cmd, mode, ifs);
846		break;
847	case SIOCSTLCK :
848		if (!(mode & FWRITE)) {
849			error = EPERM;
850		} else {
851			error = fr_lock(data, &ifs->ifs_fr_nat_lock);
852		}
853		break;
854	case SIOCSTPUT :
855		if ((mode & FWRITE) != 0) {
856			error = fr_natputent(data, getlock, ifs);
857		} else {
858			error = EACCES;
859		}
860		break;
861	case SIOCSTGSZ :
862		if (ifs->ifs_fr_nat_lock) {
863			if (getlock) {
864				READ_ENTER(&ifs->ifs_ipf_nat);
865			}
866			error = fr_natgetsz(data, ifs);
867			if (getlock) {
868				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
869			}
870		} else
871			error = EACCES;
872		break;
873	case SIOCSTGET :
874		if (ifs->ifs_fr_nat_lock) {
875			if (getlock) {
876				READ_ENTER(&ifs->ifs_ipf_nat);
877			}
878			error = fr_natgetent(data, ifs);
879			if (getlock) {
880				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
881			}
882		} else
883			error = EACCES;
884		break;
885	case SIOCIPFDELTOK :
886		error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
887		if (error != 0) {
888			error = EFAULT;
889		} else {
890			error = ipf_deltoken(arg, uid, ctx, ifs);
891		}
892		break;
893	default :
894		error = EINVAL;
895		break;
896	}
897done:
898	if (nt)
899		KFREE(nt);
900	return error;
901}
902
903
904/* ------------------------------------------------------------------------ */
905/* Function:    nat_siocaddnat                                              */
906/* Returns:     int - 0 == success, != 0 == failure                         */
907/* Parameters:  n(I)       - pointer to new NAT rule                        */
908/*              np(I)      - pointer to where to insert new NAT rule        */
909/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
910/* Mutex Locks: ipf_natio                                                   */
911/*                                                                          */
912/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
913/* from information passed to the kernel, then add it  to the appropriate   */
914/* NAT rule table(s).                                                       */
915/* ------------------------------------------------------------------------ */
916static int nat_siocaddnat(n, np, getlock, ifs)
917ipnat_t *n, **np;
918int getlock;
919ipf_stack_t *ifs;
920{
921	int error = 0, i, j;
922
923	if (nat_resolverule(n, ifs) != 0)
924		return ENOENT;
925
926	if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
927		return EINVAL;
928
929	n->in_use = 0;
930	if (n->in_redir & NAT_MAPBLK)
931		n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
932	else if (n->in_flags & IPN_AUTOPORTMAP)
933		n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
934	else if (n->in_flags & IPN_IPRANGE)
935		n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
936	else if (n->in_flags & IPN_SPLIT)
937		n->in_space = 2;
938	else if (n->in_outmsk != 0)
939		n->in_space = ~ntohl(n->in_outmsk);
940	else
941		n->in_space = 1;
942	if ((n->in_flags & NAT_TCPUDPICMPQ) && (n->in_redir != NAT_REDIRECT)) {
943		if (ntohs(n->in_pmax) < ntohs(n->in_pmin))
944			return EINVAL;
945	}
946
947	/*
948	 * Calculate the number of valid IP addresses in the output
949	 * mapping range.  In all cases, the range is inclusive of
950	 * the start and ending IP addresses.
951	 * If to a CIDR address, lose 2: broadcast + network address
952	 *                               (so subtract 1)
953	 * If to a range, add one.
954	 * If to a single IP address, set to 1.
955	 */
956	if (n->in_space) {
957		if ((n->in_flags & IPN_IPRANGE) != 0)
958			n->in_space += 1;
959		else
960			n->in_space -= 1;
961	} else
962		n->in_space = 1;
963
964#ifdef	USE_INET6
965	if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
966	    !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
967		IP6_ADD(&n->in_out[0], 1, &n->in_next6)
968	else if (n->in_v == 6 &&
969	    (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
970		n->in_next6 = n->in_in[0];
971	else if (n->in_v == 6)
972		n->in_next6 = n->in_out[0];
973	else
974#endif
975	if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
976	    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
977		n->in_nip = ntohl(n->in_outip) + 1;
978	else if ((n->in_flags & IPN_SPLIT) &&
979		 (n->in_redir & NAT_REDIRECT))
980		n->in_nip = ntohl(n->in_inip);
981	else
982		n->in_nip = ntohl(n->in_outip);
983
984	if (n->in_redir & NAT_MAP) {
985		n->in_pnext = ntohs(n->in_pmin);
986		/*
987		 * Multiply by the number of ports made available.
988		 */
989		if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
990			n->in_space *= (ntohs(n->in_pmax) -
991					ntohs(n->in_pmin) + 1);
992			/*
993			 * Because two different sources can map to
994			 * different destinations but use the same
995			 * local IP#/port #.
996			 * If the result is smaller than in_space, then
997			 * we may have wrapped around 32bits.
998			 */
999			i = n->in_inmsk;
1000			if ((i != 0) && (i != 0xffffffff)) {
1001				j = n->in_space * (~ntohl(i) + 1);
1002				if (j >= n->in_space)
1003					n->in_space = j;
1004				else
1005					n->in_space = 0xffffffff;
1006			}
1007		}
1008		/*
1009		 * If no protocol is specified, multiple by 256 to allow for
1010		 * at least one IP:IP mapping per protocol.
1011		 */
1012		if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1013				j = n->in_space * 256;
1014				if (j >= n->in_space)
1015					n->in_space = j;
1016				else
1017					n->in_space = 0xffffffff;
1018		}
1019	}
1020
1021	/* Otherwise, these fields are preset */
1022
1023	if (getlock) {
1024		WRITE_ENTER(&ifs->ifs_ipf_nat);
1025	}
1026	n->in_next = NULL;
1027	*np = n;
1028
1029	if (n->in_age[0] != 0)
1030	    n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1031						  n->in_age[0], ifs);
1032
1033	if (n->in_age[1] != 0)
1034	    n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1035						  n->in_age[1], ifs);
1036
1037	if (n->in_redir & NAT_REDIRECT) {
1038		n->in_flags &= ~IPN_NOTDST;
1039		switch (n->in_v)
1040		{
1041		case 4 :
1042			nat_addrdr(n, ifs);
1043			break;
1044#ifdef	USE_INET6
1045		case 6 :
1046			nat6_addrdr(n, ifs);
1047			break;
1048#endif
1049		default :
1050			break;
1051		}
1052	}
1053	if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1054		n->in_flags &= ~IPN_NOTSRC;
1055		switch (n->in_v)
1056		{
1057		case 4 :
1058			nat_addnat(n, ifs);
1059			break;
1060#ifdef	USE_INET6
1061		case 6 :
1062			nat6_addnat(n, ifs);
1063			break;
1064#endif
1065		default :
1066			break;
1067		}
1068	}
1069	n = NULL;
1070	ifs->ifs_nat_stats.ns_rules++;
1071	if (getlock) {
1072		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* WRITE */
1073	}
1074
1075	return error;
1076}
1077
1078
1079/* ------------------------------------------------------------------------ */
1080/* Function:    nat_resolvrule                                              */
1081/* Returns:     int - 0 == success, -1 == failure                           */
1082/* Parameters:  n(I)  - pointer to NAT rule                                 */
1083/*                                                                          */
1084/* Resolve some of the details inside the NAT rule.  Includes resolving	    */
1085/* any specified interfaces and proxy labels, and determines whether or not */
1086/* all proxy labels are correctly specified.				    */
1087/*									    */
1088/* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT).     */
1089/* ------------------------------------------------------------------------ */
1090static int nat_resolverule(n, ifs)
1091ipnat_t *n;
1092ipf_stack_t *ifs;
1093{
1094	n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1095	n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1096
1097	n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1098	if (n->in_ifnames[1][0] == '\0') {
1099		(void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1100		n->in_ifps[1] = n->in_ifps[0];
1101	} else {
1102		n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1103	}
1104
1105	if (n->in_plabel[0] != '\0') {
1106		n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1107		if (n->in_apr == NULL)
1108			return -1;
1109	}
1110	return 0;
1111}
1112
1113
1114/* ------------------------------------------------------------------------ */
1115/* Function:    nat_siocdelnat                                              */
1116/* Returns:     int - 0 == success, != 0 == failure                         */
1117/* Parameters:  n(I)       - pointer to new NAT rule                        */
1118/*              np(I)      - pointer to where to insert new NAT rule        */
1119/*              getlock(I) - flag indicating if lock on ipf_nat is held     */
1120/* Mutex Locks: ipf_natio                                                   */
1121/*                                                                          */
1122/* Handle SIOCADNAT.  Resolve and calculate details inside the NAT rule     */
1123/* from information passed to the kernel, then add it  to the appropriate   */
1124/* NAT rule table(s).                                                       */
1125/* ------------------------------------------------------------------------ */
1126static void nat_siocdelnat(n, np, getlock, ifs)
1127ipnat_t *n, **np;
1128int getlock;
1129ipf_stack_t *ifs;
1130{
1131	int i;
1132
1133	if (getlock) {
1134		WRITE_ENTER(&ifs->ifs_ipf_nat);
1135	}
1136	if (n->in_redir & NAT_REDIRECT)
1137		nat_delrdr(n);
1138	if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1139		nat_delnat(n);
1140	if (ifs->ifs_nat_list == NULL) {
1141		ifs->ifs_nat_masks = 0;
1142		ifs->ifs_rdr_masks = 0;
1143		for (i = 0; i < 4; i++) {
1144			ifs->ifs_nat6_masks[i] = 0;
1145			ifs->ifs_rdr6_masks[i] = 0;
1146		}
1147	}
1148
1149	if (n->in_tqehead[0] != NULL) {
1150		if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1151			fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1152		}
1153	}
1154
1155	if (n->in_tqehead[1] != NULL) {
1156		if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1157			fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1158		}
1159	}
1160
1161	*np = n->in_next;
1162
1163	if (n->in_use == 0) {
1164		if (n->in_apr)
1165			appr_free(n->in_apr);
1166		KFREE(n);
1167		ifs->ifs_nat_stats.ns_rules--;
1168	} else {
1169		n->in_flags |= IPN_DELETE;
1170		n->in_next = NULL;
1171	}
1172	if (getlock) {
1173		RWLOCK_EXIT(&ifs->ifs_ipf_nat);			/* READ/WRITE */
1174	}
1175}
1176
1177
1178/* ------------------------------------------------------------------------ */
1179/* Function:    fr_natgetsz                                                 */
1180/* Returns:     int - 0 == success, != 0 is the error value.                */
1181/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1182/*                        get the size of.                                  */
1183/*                                                                          */
1184/* Handle SIOCSTGSZ.                                                        */
1185/* Return the size of the nat list entry to be copied back to user space.   */
1186/* The size of the entry is stored in the ng_sz field and the enture natget */
1187/* structure is copied back to the user.                                    */
1188/* ------------------------------------------------------------------------ */
1189static int fr_natgetsz(data, ifs)
1190caddr_t data;
1191ipf_stack_t *ifs;
1192{
1193	ap_session_t *aps;
1194	nat_t *nat, *n;
1195	natget_t ng;
1196	int err;
1197
1198	err = BCOPYIN(data, &ng, sizeof(ng));
1199	if (err != 0)
1200		return EFAULT;
1201
1202	nat = ng.ng_ptr;
1203	if (!nat) {
1204		nat = ifs->ifs_nat_instances;
1205		ng.ng_sz = 0;
1206		/*
1207		 * Empty list so the size returned is 0.  Simple.
1208		 */
1209		if (nat == NULL) {
1210			err = BCOPYOUT(&ng, data, sizeof(ng));
1211			if (err != 0) {
1212				return EFAULT;
1213			} else {
1214				return 0;
1215			}
1216		}
1217	} else {
1218		/*
1219		 * Make sure the pointer we're copying from exists in the
1220		 * current list of entries.  Security precaution to prevent
1221		 * copying of random kernel data.
1222		 */
1223		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1224			if (n == nat)
1225				break;
1226		if (!n)
1227			return ESRCH;
1228	}
1229
1230	/*
1231	 * Incluse any space required for proxy data structures.
1232	 */
1233	ng.ng_sz = sizeof(nat_save_t);
1234	aps = nat->nat_aps;
1235	if (aps != NULL) {
1236		ng.ng_sz += sizeof(ap_session_t) - 4;
1237		if (aps->aps_data != 0)
1238			ng.ng_sz += aps->aps_psiz;
1239	}
1240
1241	err = BCOPYOUT(&ng, data, sizeof(ng));
1242	if (err != 0)
1243		return EFAULT;
1244	return 0;
1245}
1246
1247
1248/* ------------------------------------------------------------------------ */
1249/* Function:    fr_natgetent                                                */
1250/* Returns:     int - 0 == success, != 0 is the error value.                */
1251/* Parameters:  data(I) - pointer to natget structure with kernel pointer   */
1252/*                        to NAT structure to copy out.                     */
1253/*                                                                          */
1254/* Handle SIOCSTGET.                                                        */
1255/* Copies out NAT entry to user space.  Any additional data held for a      */
1256/* proxy is also copied, as to is the NAT rule which was responsible for it */
1257/* ------------------------------------------------------------------------ */
1258static int fr_natgetent(data, ifs)
1259caddr_t data;
1260ipf_stack_t *ifs;
1261{
1262	int error, outsize;
1263	ap_session_t *aps;
1264	nat_save_t *ipn, ipns;
1265	nat_t *n, *nat;
1266
1267	error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1268	if (error != 0)
1269		return error;
1270
1271	if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1272		return EINVAL;
1273
1274	KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1275	if (ipn == NULL)
1276		return ENOMEM;
1277
1278	ipn->ipn_dsize = ipns.ipn_dsize;
1279	nat = ipns.ipn_next;
1280	if (nat == NULL) {
1281		nat = ifs->ifs_nat_instances;
1282		if (nat == NULL) {
1283			if (ifs->ifs_nat_instances == NULL)
1284				error = ENOENT;
1285			goto finished;
1286		}
1287	} else {
1288		/*
1289		 * Make sure the pointer we're copying from exists in the
1290		 * current list of entries.  Security precaution to prevent
1291		 * copying of random kernel data.
1292		 */
1293		for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1294			if (n == nat)
1295				break;
1296		if (n == NULL) {
1297			error = ESRCH;
1298			goto finished;
1299		}
1300	}
1301	ipn->ipn_next = nat->nat_next;
1302
1303	/*
1304	 * Copy the NAT structure.
1305	 */
1306	bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1307
1308	/*
1309	 * If we have a pointer to the NAT rule it belongs to, save that too.
1310	 */
1311	if (nat->nat_ptr != NULL)
1312		bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1313		      sizeof(ipn->ipn_ipnat));
1314
1315	/*
1316	 * If we also know the NAT entry has an associated filter rule,
1317	 * save that too.
1318	 */
1319	if (nat->nat_fr != NULL)
1320		bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1321		      sizeof(ipn->ipn_fr));
1322
1323	/*
1324	 * Last but not least, if there is an application proxy session set
1325	 * up for this NAT entry, then copy that out too, including any
1326	 * private data saved along side it by the proxy.
1327	 */
1328	aps = nat->nat_aps;
1329	outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1330	if (aps != NULL) {
1331		char *s;
1332
1333		if (outsize < sizeof(*aps)) {
1334			error = ENOBUFS;
1335			goto finished;
1336		}
1337
1338		s = ipn->ipn_data;
1339		bcopy((char *)aps, s, sizeof(*aps));
1340		s += sizeof(*aps);
1341		outsize -= sizeof(*aps);
1342		if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1343			bcopy(aps->aps_data, s, aps->aps_psiz);
1344		else
1345			error = ENOBUFS;
1346	}
1347	if (error == 0) {
1348		error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1349	}
1350
1351finished:
1352	if (ipn != NULL) {
1353		KFREES(ipn, ipns.ipn_dsize);
1354	}
1355	return error;
1356}
1357
1358/* ------------------------------------------------------------------------ */
1359/* Function:    nat_calc_chksum_diffs					    */
1360/* Returns:     void							    */
1361/* Parameters:  nat	-	pointer to NAT table entry		    */
1362/*                                                                          */
1363/* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1364/* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when     */
1365/* we are dealing with partial chksum offload. For these cases we need to   */
1366/* compute a 'partial chksum delta'. The 'partial chksum delta'is stored    */
1367/* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in 	    */
1368/* nat_sumd[0]. 							    */
1369/*									    */
1370/* The function accepts initialized NAT table entry and computes the deltas */
1371/* from nat_inip/nat_outip members. The function is called right before	    */
1372/* the new entry is inserted into the table.				    */
1373/*									    */
1374/* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum   */
1375/* of delta between original and new IP addresses.			    */
1376/*									    */
1377/* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as  */
1378/* a chkusm of delta between original an new IP addrress:port tupples.	    */
1379/*									    */
1380/* Some facts about chksum, we should remember:				    */
1381/*	IP header chksum covers IP header only				    */
1382/*									    */
1383/*	TCP/UDP chksum covers data payload and so called pseudo header	    */
1384/*		SRC, DST IP address					    */
1385/*		SRC, DST Port						    */
1386/*		length of payload					    */
1387/*									    */
1388/* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16	    */
1389/* member of dblk_t structure. The db_ckusm16 member is not part of 	    */
1390/* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1391/* chksum offload capacbility for every inbound packet. The db_cksum16 is   */
1392/* stored along with other IP packet data in dblk_t structure and used in   */
1393/* for IP/UDP/TCP chksum validation later in ip.c. 			    */
1394/*									    */
1395/* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1396/* of delta between new and orig address. NOTE: the order of operands for   */
1397/* partial delta operation is swapped compared to computing the IP/TCP/UDP  */
1398/* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c).  */
1399/*									    */
1400/* ------------------------------------------------------------------------ */
1401void nat_calc_chksum_diffs(nat)
1402nat_t *nat;
1403{
1404	u_32_t	sum_orig = 0;
1405	u_32_t	sum_changed = 0;
1406	u_32_t	sumd;
1407	u_32_t	ipsum_orig = 0;
1408	u_32_t	ipsum_changed = 0;
1409
1410	if (nat->nat_v != 4 && nat->nat_v != 6)
1411		return;
1412
1413	/*
1414	 * the switch calculates operands for CALC_SUMD(),
1415	 * which will compute the partial chksum delta.
1416	 */
1417	switch (nat->nat_dir)
1418	{
1419	case NAT_INBOUND:
1420		/*
1421		 * we are dealing with RDR rule (DST address gets
1422		 * modified on packet from client)
1423		 */
1424		if (nat->nat_v == 4) {
1425			sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1426			sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1427		} else {
1428			sum_changed = LONG_SUM6(&nat->nat_inip6);
1429			sum_orig = LONG_SUM6(&nat->nat_outip6);
1430		}
1431		break;
1432	case NAT_OUTBOUND:
1433		/*
1434		 * we are dealing with MAP rule (SRC address gets
1435		 * modified on packet from client)
1436		 */
1437		if (nat->nat_v == 4) {
1438			sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1439			sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1440		} else {
1441			sum_changed = LONG_SUM6(&nat->nat_outip6);
1442			sum_orig = LONG_SUM6(&nat->nat_inip6);
1443		}
1444		break;
1445	default: ;
1446		break;
1447	}
1448
1449	/*
1450	 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1451	 * calculation, which happens at the end of function.
1452	 */
1453	ipsum_changed = sum_changed;
1454	ipsum_orig = sum_orig;
1455	/*
1456	 * NOTE: the order of operands for partial chksum adjustment
1457	 * computation has to be swapped!
1458	 */
1459	CALC_SUMD(sum_changed, sum_orig, sumd);
1460	nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1461
1462	if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1463
1464		/*
1465		 * switch calculates operands for CALC_SUMD(), which will
1466		 * compute the full chksum delta.
1467		 */
1468		switch (nat->nat_dir)
1469		{
1470		case NAT_INBOUND:
1471			if (nat->nat_v == 4) {
1472				sum_changed = LONG_SUM(
1473				    ntohl(nat->nat_inip.s_addr) +
1474				    ntohs(nat->nat_inport));
1475				sum_orig = LONG_SUM(
1476				    ntohl(nat->nat_outip.s_addr) +
1477				    ntohs(nat->nat_outport));
1478			} else {
1479				sum_changed = LONG_SUM6(&nat->nat_inip6) +
1480				    ntohs(nat->nat_inport);
1481				sum_orig = LONG_SUM6(&nat->nat_outip6) +
1482				    ntohs(nat->nat_outport);
1483			}
1484			break;
1485		case NAT_OUTBOUND:
1486			if (nat->nat_v == 4) {
1487				sum_changed = LONG_SUM(
1488				    ntohl(nat->nat_outip.s_addr) +
1489				    ntohs(nat->nat_outport));
1490				sum_orig = LONG_SUM(
1491				    ntohl(nat->nat_inip.s_addr) +
1492				    ntohs(nat->nat_inport));
1493			} else {
1494				sum_changed = LONG_SUM6(&nat->nat_outip6) +
1495				    ntohs(nat->nat_outport);
1496				sum_orig = LONG_SUM6(&nat->nat_inip6) +
1497				    ntohs(nat->nat_inport);
1498			}
1499			break;
1500		default: ;
1501			break;
1502		}
1503
1504		CALC_SUMD(sum_orig, sum_changed, sumd);
1505		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1506
1507		if (!(nat->nat_flags & IPN_TCPUDP)) {
1508			/*
1509			 * partial HW chksum offload works for TCP/UDP headers only,
1510			 * so we need to enforce full chksum adjustment for ICMP
1511			 */
1512			nat->nat_sumd[1] = nat->nat_sumd[0];
1513		}
1514	}
1515	else
1516		nat->nat_sumd[0] = nat->nat_sumd[1];
1517
1518	/*
1519	 * we may reuse the already computed nat_sumd[0] for IP header chksum
1520	 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1521	 */
1522	if (nat->nat_v == 4) {
1523		if (NAT_HAS_L4_CHANGED(nat)) {
1524			/*
1525			 * bad luck, NAT changes also the L4 header, use IP
1526			 * addresses to compute chksum adjustment for IP header.
1527			 */
1528			CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1529			nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1530		} else {
1531			/*
1532			 * the NAT does not change L4 hdr -> reuse chksum
1533			 * adjustment for IP hdr.
1534			 */
1535			nat->nat_ipsumd = nat->nat_sumd[0];
1536
1537			/*
1538			 * if L4 header does not use chksum - zero out deltas
1539			 */
1540			if (!(nat->nat_flags & IPN_TCPUDP)) {
1541				nat->nat_sumd[0] = 0;
1542				nat->nat_sumd[1] = 0;
1543			}
1544		}
1545	}
1546
1547	return;
1548}
1549
1550/* ------------------------------------------------------------------------ */
1551/* Function:    fr_natputent                                                */
1552/* Returns:     int - 0 == success, != 0 is the error value.                */
1553/* Parameters:  data(I)    - pointer to natget structure with NAT           */
1554/*                           structure information to load into the kernel  */
1555/*              getlock(I) - flag indicating whether or not a write lock    */
1556/*                           on ipf_nat is already held.                    */
1557/*              ifs        - ipf stack instance                             */
1558/*                                                                          */
1559/* Handle SIOCSTPUT.                                                        */
1560/* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1561/* firewall rule data structures, if pointers to them indicate so.          */
1562/* ------------------------------------------------------------------------ */
1563static int fr_natputent(data, getlock, ifs)
1564caddr_t data;
1565int getlock;
1566ipf_stack_t *ifs;
1567{
1568	nat_save_t ipn, *ipnn;
1569	ap_session_t *aps;
1570	nat_t *n, *nat;
1571	frentry_t *fr;
1572	fr_info_t fin;
1573	ipnat_t *in;
1574	int error;
1575
1576	error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1577	if (error != 0)
1578		return error;
1579
1580	/*
1581	 * Trigger automatic call to nat_flushtable() if the
1582	 * table has reached capcity specified by hi watermark.
1583	 */
1584	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
1585		ifs->ifs_nat_doflush = 1;
1586
1587	/*
1588	 * If automatic flushing did not do its job, and the table
1589	 * has filled up, don't try to create a new entry.
1590	 */
1591	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1592		ifs->ifs_nat_stats.ns_memfail++;
1593		return ENOMEM;
1594	}
1595
1596	/*
1597	 * Initialise early because of code at junkput label.
1598	 */
1599	in = NULL;
1600	aps = NULL;
1601	nat = NULL;
1602	ipnn = NULL;
1603
1604	/*
1605	 * New entry, copy in the rest of the NAT entry if it's size is more
1606	 * than just the nat_t structure.
1607	 */
1608	fr = NULL;
1609	if (ipn.ipn_dsize > sizeof(ipn)) {
1610		if (ipn.ipn_dsize > 81920) {
1611			error = ENOMEM;
1612			goto junkput;
1613		}
1614
1615		KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1616		if (ipnn == NULL)
1617			return ENOMEM;
1618
1619		error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1620		if (error != 0) {
1621			error = EFAULT;
1622			goto junkput;
1623		}
1624	} else
1625		ipnn = &ipn;
1626
1627	KMALLOC(nat, nat_t *);
1628	if (nat == NULL) {
1629		error = ENOMEM;
1630		goto junkput;
1631	}
1632
1633	bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1634	/*
1635	 * Initialize all these so that nat_delete() doesn't cause a crash.
1636	 */
1637	bzero((char *)nat, offsetof(struct nat, nat_tqe));
1638	nat->nat_tqe.tqe_pnext = NULL;
1639	nat->nat_tqe.tqe_next = NULL;
1640	nat->nat_tqe.tqe_ifq = NULL;
1641	nat->nat_tqe.tqe_parent = nat;
1642
1643	/*
1644	 * Restore the rule associated with this nat session
1645	 */
1646	in = ipnn->ipn_nat.nat_ptr;
1647	if (in != NULL) {
1648		KMALLOC(in, ipnat_t *);
1649		nat->nat_ptr = in;
1650		if (in == NULL) {
1651			error = ENOMEM;
1652			goto junkput;
1653		}
1654		bzero((char *)in, offsetof(struct ipnat, in_next6));
1655		bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1656		in->in_use = 1;
1657		in->in_flags |= IPN_DELETE;
1658
1659		ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1660
1661		if (nat_resolverule(in, ifs) != 0) {
1662			error = ESRCH;
1663			goto junkput;
1664		}
1665	}
1666
1667	/*
1668	 * Check that the NAT entry doesn't already exist in the kernel.
1669	 */
1670	if (nat->nat_v != 6)
1671		nat->nat_v = 4;
1672	bzero((char *)&fin, sizeof(fin));
1673	fin.fin_p = nat->nat_p;
1674	fin.fin_ifs = ifs;
1675	if (nat->nat_dir == NAT_OUTBOUND) {
1676		fin.fin_data[0] = ntohs(nat->nat_oport);
1677		fin.fin_data[1] = ntohs(nat->nat_outport);
1678		fin.fin_ifp = nat->nat_ifps[0];
1679		if (getlock) {
1680			READ_ENTER(&ifs->ifs_ipf_nat);
1681		}
1682
1683		switch (nat->nat_v)
1684		{
1685		case 4:
1686			fin.fin_v = nat->nat_v;
1687			n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1688			    nat->nat_oip, nat->nat_outip);
1689			break;
1690#ifdef USE_INET6
1691		case 6:
1692			n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1693			    &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1694			break;
1695#endif
1696		default:
1697			n = NULL;
1698			break;
1699		}
1700
1701		if (getlock) {
1702			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1703		}
1704		if (n != NULL) {
1705			error = EEXIST;
1706			goto junkput;
1707		}
1708	} else if (nat->nat_dir == NAT_INBOUND) {
1709		fin.fin_data[0] = ntohs(nat->nat_inport);
1710		fin.fin_data[1] = ntohs(nat->nat_oport);
1711		fin.fin_ifp = nat->nat_ifps[1];
1712		if (getlock) {
1713			READ_ENTER(&ifs->ifs_ipf_nat);
1714		}
1715
1716		switch (nat->nat_v)
1717		{
1718		case 4:
1719			n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1720			    nat->nat_inip, nat->nat_oip);
1721			break;
1722#ifdef USE_INET6
1723		case 6:
1724			n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1725			    &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1726			break;
1727#endif
1728		default:
1729			n = NULL;
1730			break;
1731		}
1732
1733		if (getlock) {
1734			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1735		}
1736		if (n != NULL) {
1737			error = EEXIST;
1738			goto junkput;
1739		}
1740	} else {
1741		error = EINVAL;
1742		goto junkput;
1743	}
1744
1745	/*
1746	 * Restore ap_session_t structure.  Include the private data allocated
1747	 * if it was there.
1748	 */
1749	aps = nat->nat_aps;
1750	if (aps != NULL) {
1751		KMALLOC(aps, ap_session_t *);
1752		nat->nat_aps = aps;
1753		if (aps == NULL) {
1754			error = ENOMEM;
1755			goto junkput;
1756		}
1757		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1758		if (in != NULL)
1759			aps->aps_apr = in->in_apr;
1760		else
1761			aps->aps_apr = NULL;
1762		if (aps->aps_psiz != 0) {
1763			if (aps->aps_psiz > 81920) {
1764				error = ENOMEM;
1765				goto junkput;
1766			}
1767			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1768			if (aps->aps_data == NULL) {
1769				error = ENOMEM;
1770				goto junkput;
1771			}
1772			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1773			      aps->aps_psiz);
1774		} else {
1775			aps->aps_psiz = 0;
1776			aps->aps_data = NULL;
1777		}
1778	}
1779
1780	/*
1781	 * If there was a filtering rule associated with this entry then
1782	 * build up a new one.
1783	 */
1784	fr = nat->nat_fr;
1785	if (fr != NULL) {
1786		if ((nat->nat_flags & SI_NEWFR) != 0) {
1787			KMALLOC(fr, frentry_t *);
1788			nat->nat_fr = fr;
1789			if (fr == NULL) {
1790				error = ENOMEM;
1791				goto junkput;
1792			}
1793			ipnn->ipn_nat.nat_fr = fr;
1794			(void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1795			bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1796
1797			fr->fr_ref = 1;
1798			fr->fr_dsize = 0;
1799			fr->fr_data = NULL;
1800			fr->fr_type = FR_T_NONE;
1801
1802			MUTEX_NUKE(&fr->fr_lock);
1803			MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1804		} else {
1805			if (getlock) {
1806				READ_ENTER(&ifs->ifs_ipf_nat);
1807			}
1808			for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1809				if (n->nat_fr == fr)
1810					break;
1811
1812			if (n != NULL) {
1813				MUTEX_ENTER(&fr->fr_lock);
1814				fr->fr_ref++;
1815				MUTEX_EXIT(&fr->fr_lock);
1816			}
1817			if (getlock) {
1818				RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1819			}
1820			if (!n) {
1821				error = ESRCH;
1822				goto junkput;
1823			}
1824		}
1825	}
1826
1827	if (ipnn != &ipn) {
1828		KFREES(ipnn, ipn.ipn_dsize);
1829		ipnn = NULL;
1830	}
1831
1832	nat_calc_chksum_diffs(nat);
1833
1834	if (getlock) {
1835		WRITE_ENTER(&ifs->ifs_ipf_nat);
1836	}
1837
1838	nat_calc_chksum_diffs(nat);
1839
1840	switch (nat->nat_v)
1841	{
1842	case 4 :
1843		error = nat_insert(nat, nat->nat_rev, ifs);
1844		break;
1845#ifdef USE_INET6
1846	case 6 :
1847		error = nat6_insert(nat, nat->nat_rev, ifs);
1848		break;
1849#endif
1850	default :
1851		break;
1852	}
1853
1854	if ((error == 0) && (aps != NULL)) {
1855		aps->aps_next = ifs->ifs_ap_sess_list;
1856		ifs->ifs_ap_sess_list = aps;
1857	}
1858	if (getlock) {
1859		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1860	}
1861
1862	if (error == 0)
1863		return 0;
1864
1865	error = ENOMEM;
1866
1867junkput:
1868	if (fr != NULL)
1869		(void) fr_derefrule(&fr, ifs);
1870
1871	if ((ipnn != NULL) && (ipnn != &ipn)) {
1872		KFREES(ipnn, ipn.ipn_dsize);
1873	}
1874	if (nat != NULL) {
1875		if (aps != NULL) {
1876			if (aps->aps_data != NULL) {
1877				KFREES(aps->aps_data, aps->aps_psiz);
1878			}
1879			KFREE(aps);
1880		}
1881		if (in != NULL) {
1882			if (in->in_apr)
1883				appr_free(in->in_apr);
1884			KFREE(in);
1885		}
1886		KFREE(nat);
1887	}
1888	return error;
1889}
1890
1891
1892/* ------------------------------------------------------------------------ */
1893/* Function:    nat_delete                                                  */
1894/* Returns:     int	- 0 if entry deleted. Otherwise, ref count on entry */
1895/* Parameters:  nat	- pointer to the NAT entry to delete		    */
1896/*		logtype	- type of LOG record to create before deleting	    */
1897/*		ifs	- ipf stack instance				    */
1898/* Write Lock:  ipf_nat                                                     */
1899/*                                                                          */
1900/* Delete a nat entry from the various lists and table.  If NAT logging is  */
1901/* enabled then generate a NAT log record for this event.                   */
1902/* ------------------------------------------------------------------------ */
1903int nat_delete(nat, logtype, ifs)
1904struct nat *nat;
1905int logtype;
1906ipf_stack_t *ifs;
1907{
1908	struct ipnat *ipn;
1909	int removed = 0;
1910
1911	if (logtype != 0 && ifs->ifs_nat_logging != 0)
1912		nat_log(nat, logtype, ifs);
1913
1914	/*
1915	 * Start by removing the entry from the hash table of nat entries
1916	 * so it will not be "used" again.
1917	 *
1918	 * It will remain in the "list" of nat entries until all references
1919	 * have been accounted for.
1920	 */
1921	if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1922		removed = 1;
1923
1924		ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1925		ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1926
1927		*nat->nat_phnext[0] = nat->nat_hnext[0];
1928		if (nat->nat_hnext[0] != NULL) {
1929			nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1930			nat->nat_hnext[0] = NULL;
1931		}
1932		nat->nat_phnext[0] = NULL;
1933
1934		*nat->nat_phnext[1] = nat->nat_hnext[1];
1935		if (nat->nat_hnext[1] != NULL) {
1936			nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1937			nat->nat_hnext[1] = NULL;
1938		}
1939		nat->nat_phnext[1] = NULL;
1940
1941		if ((nat->nat_flags & SI_WILDP) != 0)
1942			ifs->ifs_nat_stats.ns_wilds--;
1943	}
1944
1945	/*
1946	 * Next, remove it from the timeout queue it is in.
1947	 */
1948	fr_deletequeueentry(&nat->nat_tqe);
1949
1950	if (nat->nat_me != NULL) {
1951		*nat->nat_me = NULL;
1952		nat->nat_me = NULL;
1953	}
1954
1955	MUTEX_ENTER(&nat->nat_lock);
1956 	if (nat->nat_ref > 1) {
1957		nat->nat_ref--;
1958		MUTEX_EXIT(&nat->nat_lock);
1959 		if (removed)
1960 			ifs->ifs_nat_stats.ns_orphans++;
1961		return (nat->nat_ref);
1962	}
1963	MUTEX_EXIT(&nat->nat_lock);
1964
1965	nat->nat_ref = 0;
1966
1967	/*
1968	 * If entry had already been removed,
1969	 * it means we're cleaning up an orphan.
1970	 */
1971 	if (!removed)
1972 		ifs->ifs_nat_stats.ns_orphans--;
1973
1974#ifdef	IPFILTER_SYNC
1975	if (nat->nat_sync)
1976		ipfsync_del(nat->nat_sync);
1977#endif
1978
1979	/*
1980	 * Now remove it from master list of nat table entries
1981	 */
1982	if (nat->nat_pnext != NULL) {
1983		*nat->nat_pnext = nat->nat_next;
1984		if (nat->nat_next != NULL) {
1985			nat->nat_next->nat_pnext = nat->nat_pnext;
1986			nat->nat_next = NULL;
1987		}
1988		nat->nat_pnext = NULL;
1989	}
1990
1991	if (nat->nat_fr != NULL)
1992		(void)fr_derefrule(&nat->nat_fr, ifs);
1993
1994	if (nat->nat_hm != NULL)
1995		fr_hostmapdel(&nat->nat_hm);
1996
1997	/*
1998	 * If there is an active reference from the nat entry to its parent
1999	 * rule, decrement the rule's reference count and free it too if no
2000	 * longer being used.
2001	 */
2002	ipn = nat->nat_ptr;
2003	if (ipn != NULL) {
2004		ipn->in_space++;
2005		ipn->in_use--;
2006		if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2007			if (ipn->in_apr)
2008				appr_free(ipn->in_apr);
2009			KFREE(ipn);
2010			ifs->ifs_nat_stats.ns_rules--;
2011		}
2012	}
2013
2014	MUTEX_DESTROY(&nat->nat_lock);
2015
2016	aps_free(nat->nat_aps, ifs);
2017	ifs->ifs_nat_stats.ns_inuse--;
2018
2019	/*
2020	 * If there's a fragment table entry too for this nat entry, then
2021	 * dereference that as well.  This is after nat_lock is released
2022	 * because of Tru64.
2023	 */
2024	fr_forgetnat((void *)nat, ifs);
2025
2026	KFREE(nat);
2027
2028	return (0);
2029}
2030
2031
2032/* ------------------------------------------------------------------------ */
2033/* Function:    nat_clearlist                                               */
2034/* Returns:     int - number of NAT/RDR rules deleted                       */
2035/* Parameters:  Nil                                                         */
2036/*                                                                          */
2037/* Delete all rules in the current list of rules.  There is nothing elegant */
2038/* about this cleanup: simply free all entries on the list of rules and     */
2039/* clear out the tables used for hashed NAT rule lookups.                   */
2040/* ------------------------------------------------------------------------ */
2041static int nat_clearlist(ifs)
2042ipf_stack_t *ifs;
2043{
2044	ipnat_t *n, **np = &ifs->ifs_nat_list;
2045	int i = 0;
2046
2047	if (ifs->ifs_nat_rules != NULL)
2048		bzero((char *)ifs->ifs_nat_rules,
2049		      sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2050	if (ifs->ifs_rdr_rules != NULL)
2051		bzero((char *)ifs->ifs_rdr_rules,
2052		      sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2053
2054	while ((n = *np) != NULL) {
2055		*np = n->in_next;
2056		if (n->in_use == 0) {
2057			if (n->in_apr != NULL)
2058				appr_free(n->in_apr);
2059			KFREE(n);
2060			ifs->ifs_nat_stats.ns_rules--;
2061		} else {
2062			n->in_flags |= IPN_DELETE;
2063			n->in_next = NULL;
2064		}
2065		i++;
2066	}
2067	ifs->ifs_nat_masks = 0;
2068	ifs->ifs_rdr_masks = 0;
2069	for (i = 0; i < 4; i++) {
2070		ifs->ifs_nat6_masks[i] = 0;
2071		ifs->ifs_rdr6_masks[i] = 0;
2072	}
2073	return i;
2074}
2075
2076
2077/* ------------------------------------------------------------------------ */
2078/* Function:    nat_newmap                                                  */
2079/* Returns:     int - -1 == error, 0 == success                             */
2080/* Parameters:  fin(I) - pointer to packet information                      */
2081/*              nat(I) - pointer to NAT entry                               */
2082/*              ni(I)  - pointer to structure with misc. information needed */
2083/*                       to create new NAT entry.                           */
2084/*                                                                          */
2085/* Given an empty NAT structure, populate it with new information about a   */
2086/* new NAT session, as defined by the matching NAT rule.                    */
2087/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2088/* to the new IP address for the translation.                               */
2089/* ------------------------------------------------------------------------ */
2090static INLINE int nat_newmap(fin, nat, ni)
2091fr_info_t *fin;
2092nat_t *nat;
2093natinfo_t *ni;
2094{
2095	u_short st_port, dport, sport, port, sp, dp;
2096	struct in_addr in, inb;
2097	hostmap_t *hm;
2098	u_32_t flags;
2099	u_32_t st_ip;
2100	ipnat_t *np;
2101	nat_t *natl;
2102	int l;
2103	ipf_stack_t *ifs = fin->fin_ifs;
2104
2105	/*
2106	 * If it's an outbound packet which doesn't match any existing
2107	 * record, then create a new port
2108	 */
2109	l = 0;
2110	hm = NULL;
2111	np = ni->nai_np;
2112	st_ip = np->in_nip;
2113	st_port = np->in_pnext;
2114	flags = ni->nai_flags;
2115	sport = ni->nai_sport;
2116	dport = ni->nai_dport;
2117
2118	/*
2119	 * Do a loop until we either run out of entries to try or we find
2120	 * a NAT mapping that isn't currently being used.  This is done
2121	 * because the change to the source is not (usually) being fixed.
2122	 */
2123	do {
2124		port = 0;
2125		in.s_addr = htonl(np->in_nip);
2126		if (l == 0) {
2127			/*
2128			 * Check to see if there is an existing NAT
2129			 * setup for this IP address pair.
2130			 */
2131			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2132					 in, 0, ifs);
2133			if (hm != NULL)
2134				in.s_addr = hm->hm_mapip.s_addr;
2135		} else if ((l == 1) && (hm != NULL)) {
2136			fr_hostmapdel(&hm);
2137		}
2138		in.s_addr = ntohl(in.s_addr);
2139
2140		nat->nat_hm = hm;
2141
2142		if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2143			if (l > 0)
2144				return -1;
2145		}
2146
2147		if (np->in_redir == NAT_BIMAP &&
2148		    np->in_inmsk == np->in_outmsk) {
2149			/*
2150			 * map the address block in a 1:1 fashion
2151			 */
2152			in.s_addr = np->in_outip;
2153			in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2154			in.s_addr = ntohl(in.s_addr);
2155
2156		} else if (np->in_redir & NAT_MAPBLK) {
2157			if ((l >= np->in_ppip) || ((l > 0) &&
2158			     !(flags & IPN_TCPUDP)))
2159				return -1;
2160			/*
2161			 * map-block - Calculate destination address.
2162			 */
2163			in.s_addr = ntohl(fin->fin_saddr);
2164			in.s_addr &= ntohl(~np->in_inmsk);
2165			inb.s_addr = in.s_addr;
2166			in.s_addr /= np->in_ippip;
2167			in.s_addr &= ntohl(~np->in_outmsk);
2168			in.s_addr += ntohl(np->in_outip);
2169			/*
2170			 * Calculate destination port.
2171			 */
2172			if ((flags & IPN_TCPUDP) &&
2173			    (np->in_ppip != 0)) {
2174				port = ntohs(sport) + l;
2175				port %= np->in_ppip;
2176				port += np->in_ppip *
2177					(inb.s_addr % np->in_ippip);
2178				port += MAPBLK_MINPORT;
2179				port = htons(port);
2180			}
2181
2182		} else if ((np->in_outip == 0) &&
2183			   (np->in_outmsk == 0xffffffff)) {
2184			/*
2185			 * 0/32 - use the interface's IP address.
2186			 */
2187			if ((l > 0) ||
2188			    fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2189				       &in, NULL, fin->fin_ifs) == -1)
2190				return -1;
2191			in.s_addr = ntohl(in.s_addr);
2192
2193		} else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2194			/*
2195			 * 0/0 - use the original source address/port.
2196			 */
2197			if (l > 0)
2198				return -1;
2199			in.s_addr = ntohl(fin->fin_saddr);
2200
2201		} else if ((np->in_outmsk != 0xffffffff) &&
2202			   (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2203			np->in_nip++;
2204
2205		natl = NULL;
2206
2207		if ((flags & IPN_TCPUDP) &&
2208		    ((np->in_redir & NAT_MAPBLK) == 0) &&
2209		    (np->in_flags & IPN_AUTOPORTMAP)) {
2210			/*
2211			 * "ports auto" (without map-block)
2212			 */
2213			if ((l > 0) && (l % np->in_ppip == 0)) {
2214				if (l > np->in_space) {
2215					return -1;
2216				} else if ((l > np->in_ppip) &&
2217					   np->in_outmsk != 0xffffffff)
2218					np->in_nip++;
2219			}
2220			if (np->in_ppip != 0) {
2221				port = ntohs(sport);
2222				port += (l % np->in_ppip);
2223				port %= np->in_ppip;
2224				port += np->in_ppip *
2225					(ntohl(fin->fin_saddr) %
2226					 np->in_ippip);
2227				port += MAPBLK_MINPORT;
2228				port = htons(port);
2229			}
2230
2231		} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2232			   (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2233			/*
2234			 * Standard port translation.  Select next port.
2235			 */
2236			if (np->in_flags & IPN_SEQUENTIAL) {
2237				port = np->in_pnext;
2238			} else {
2239				port = ipf_random() % (ntohs(np->in_pmax) -
2240						       ntohs(np->in_pmin) + 1);
2241				port += ntohs(np->in_pmin);
2242			}
2243			port = htons(port);
2244			np->in_pnext++;
2245
2246			if (np->in_pnext > ntohs(np->in_pmax)) {
2247				np->in_pnext = ntohs(np->in_pmin);
2248				if (np->in_outmsk != 0xffffffff)
2249					np->in_nip++;
2250			}
2251		}
2252
2253		if (np->in_flags & IPN_IPRANGE) {
2254			if (np->in_nip > ntohl(np->in_outmsk))
2255				np->in_nip = ntohl(np->in_outip);
2256		} else {
2257			if ((np->in_outmsk != 0xffffffff) &&
2258			    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2259			    ntohl(np->in_outip))
2260				np->in_nip = ntohl(np->in_outip) + 1;
2261		}
2262
2263		if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2264			port = sport;
2265
2266		/*
2267		 * Here we do a lookup of the connection as seen from
2268		 * the outside.  If an IP# pair already exists, try
2269		 * again.  So if you have A->B becomes C->B, you can
2270		 * also have D->E become C->E but not D->B causing
2271		 * another C->B.  Also take protocol and ports into
2272		 * account when determining whether a pre-existing
2273		 * NAT setup will cause an external conflict where
2274		 * this is appropriate.
2275		 */
2276		inb.s_addr = htonl(in.s_addr);
2277		sp = fin->fin_data[0];
2278		dp = fin->fin_data[1];
2279		fin->fin_data[0] = fin->fin_data[1];
2280		fin->fin_data[1] = htons(port);
2281		natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2282				    (u_int)fin->fin_p, fin->fin_dst, inb);
2283		fin->fin_data[0] = sp;
2284		fin->fin_data[1] = dp;
2285
2286		/*
2287		 * Has the search wrapped around and come back to the
2288		 * start ?
2289		 */
2290		if ((natl != NULL) &&
2291		    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2292		    (np->in_nip != 0) && (st_ip == np->in_nip))
2293			return -1;
2294		l++;
2295	} while (natl != NULL);
2296
2297	if (np->in_space > 0)
2298		np->in_space--;
2299
2300	/* Setup the NAT table */
2301	nat->nat_inip = fin->fin_src;
2302	nat->nat_outip.s_addr = htonl(in.s_addr);
2303	nat->nat_oip = fin->fin_dst;
2304	if (nat->nat_hm == NULL)
2305		nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2306					  nat->nat_outip, 0, ifs);
2307
2308	if (flags & IPN_TCPUDP) {
2309		nat->nat_inport = sport;
2310		nat->nat_outport = port;	/* sport */
2311		nat->nat_oport = dport;
2312		((tcphdr_t *)fin->fin_dp)->th_sport = port;
2313	} else if (flags & IPN_ICMPQUERY) {
2314		((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2315		nat->nat_inport = port;
2316		nat->nat_outport = port;
2317	}
2318
2319	ni->nai_ip.s_addr = in.s_addr;
2320	ni->nai_port = port;
2321	ni->nai_nport = dport;
2322	return 0;
2323}
2324
2325
2326/* ------------------------------------------------------------------------ */
2327/* Function:    nat_newrdr                                                  */
2328/* Returns:     int - -1 == error, 0 == success (no move), 1 == success and */
2329/*                    allow rule to be moved if IPN_ROUNDR is set.          */
2330/* Parameters:  fin(I) - pointer to packet information                      */
2331/*              nat(I) - pointer to NAT entry                               */
2332/*              ni(I)  - pointer to structure with misc. information needed */
2333/*                       to create new NAT entry.                           */
2334/*                                                                          */
2335/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2336/* to the new IP address for the translation.                               */
2337/* ------------------------------------------------------------------------ */
2338static INLINE int nat_newrdr(fin, nat, ni)
2339fr_info_t *fin;
2340nat_t *nat;
2341natinfo_t *ni;
2342{
2343	u_short nport, dport, sport;
2344	struct in_addr in, inb;
2345	u_short sp, dp;
2346	hostmap_t *hm;
2347	u_32_t flags;
2348	ipnat_t *np;
2349	nat_t *natl;
2350	int move;
2351	ipf_stack_t *ifs = fin->fin_ifs;
2352
2353	move = 1;
2354	hm = NULL;
2355	in.s_addr = 0;
2356	np = ni->nai_np;
2357	flags = ni->nai_flags;
2358	sport = ni->nai_sport;
2359	dport = ni->nai_dport;
2360
2361	/*
2362	 * If the matching rule has IPN_STICKY set, then we want to have the
2363	 * same rule kick in as before.  Why would this happen?  If you have
2364	 * a collection of rdr rules with "round-robin sticky", the current
2365	 * packet might match a different one to the previous connection but
2366	 * we want the same destination to be used.
2367	 */
2368	if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2369	    (IPN_ROUNDR|IPN_STICKY)) {
2370		hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2371				 (u_32_t)dport, ifs);
2372		if (hm != NULL) {
2373			in.s_addr = ntohl(hm->hm_mapip.s_addr);
2374			np = hm->hm_ipnat;
2375			ni->nai_np = np;
2376			move = 0;
2377		}
2378	}
2379
2380	/*
2381	 * Otherwise, it's an inbound packet. Most likely, we don't
2382	 * want to rewrite source ports and source addresses. Instead,
2383	 * we want to rewrite to a fixed internal address and fixed
2384	 * internal port.
2385	 */
2386	if (np->in_flags & IPN_SPLIT) {
2387		in.s_addr = np->in_nip;
2388
2389		if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2390			hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2391					 in, (u_32_t)dport, ifs);
2392			if (hm != NULL) {
2393				in.s_addr = hm->hm_mapip.s_addr;
2394				move = 0;
2395			}
2396		}
2397
2398		if (hm == NULL || hm->hm_ref == 1) {
2399			if (np->in_inip == htonl(in.s_addr)) {
2400				np->in_nip = ntohl(np->in_inmsk);
2401				move = 0;
2402			} else {
2403				np->in_nip = ntohl(np->in_inip);
2404			}
2405		}
2406
2407	} else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2408		/*
2409		 * 0/32 - use the interface's IP address.
2410		 */
2411		if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2412			   fin->fin_ifs) == -1)
2413			return -1;
2414		in.s_addr = ntohl(in.s_addr);
2415
2416	} else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2417		/*
2418		 * 0/0 - use the original destination address/port.
2419		 */
2420		in.s_addr = ntohl(fin->fin_daddr);
2421
2422	} else if (np->in_redir == NAT_BIMAP &&
2423		   np->in_inmsk == np->in_outmsk) {
2424		/*
2425		 * map the address block in a 1:1 fashion
2426		 */
2427		in.s_addr = np->in_inip;
2428		in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2429		in.s_addr = ntohl(in.s_addr);
2430	} else {
2431		in.s_addr = ntohl(np->in_inip);
2432	}
2433
2434	if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2435		nport = dport;
2436	else {
2437		/*
2438		 * Whilst not optimized for the case where
2439		 * pmin == pmax, the gain is not significant.
2440		 */
2441		if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2442		    (np->in_pmin != np->in_pmax)) {
2443			nport = ntohs(dport) - ntohs(np->in_pmin) +
2444				ntohs(np->in_pnext);
2445			nport = htons(nport);
2446		} else
2447			nport = np->in_pnext;
2448	}
2449
2450	/*
2451	 * When the redirect-to address is set to 0.0.0.0, just
2452	 * assume a blank `forwarding' of the packet.  We don't
2453	 * setup any translation for this either.
2454	 */
2455	if (in.s_addr == 0) {
2456		if (nport == dport)
2457			return -1;
2458		in.s_addr = ntohl(fin->fin_daddr);
2459	}
2460
2461	/*
2462	 * Check to see if this redirect mapping already exists and if
2463	 * it does, return "failure" (allowing it to be created will just
2464	 * cause one or both of these "connections" to stop working.)
2465	 */
2466	inb.s_addr = htonl(in.s_addr);
2467	sp = fin->fin_data[0];
2468	dp = fin->fin_data[1];
2469	fin->fin_data[1] = fin->fin_data[0];
2470	fin->fin_data[0] = ntohs(nport);
2471	natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2472		    (u_int)fin->fin_p, inb, fin->fin_src);
2473	fin->fin_data[0] = sp;
2474	fin->fin_data[1] = dp;
2475	if (natl != NULL)
2476		return (-1);
2477
2478	nat->nat_inip.s_addr = htonl(in.s_addr);
2479	nat->nat_outip = fin->fin_dst;
2480	nat->nat_oip = fin->fin_src;
2481
2482	ni->nai_ip.s_addr = in.s_addr;
2483	ni->nai_nport = nport;
2484	ni->nai_port = sport;
2485
2486	if (flags & IPN_TCPUDP) {
2487		nat->nat_inport = nport;
2488		nat->nat_outport = dport;
2489		nat->nat_oport = sport;
2490		((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2491	} else if (flags & IPN_ICMPQUERY) {
2492		((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2493		nat->nat_inport = nport;
2494		nat->nat_outport = nport;
2495	}
2496
2497	return move;
2498}
2499
2500/* ------------------------------------------------------------------------ */
2501/* Function:    nat_new                                                     */
2502/* Returns:     nat_t* - NULL == failure to create new NAT structure,       */
2503/*                       else pointer to new NAT structure                  */
2504/* Parameters:  fin(I)       - pointer to packet information                */
2505/*              np(I)        - pointer to NAT rule                          */
2506/*              natsave(I)   - pointer to where to store NAT struct pointer */
2507/*              flags(I)     - flags describing the current packet          */
2508/*              direction(I) - direction of packet (in/out)                 */
2509/* Write Lock:  ipf_nat                                                     */
2510/*                                                                          */
2511/* Attempts to create a new NAT entry.  Does not actually change the packet */
2512/* in any way.                                                              */
2513/*                                                                          */
2514/* This fucntion is in three main parts: (1) deal with creating a new NAT   */
2515/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with     */
2516/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2517/* and (3) building that structure and putting it into the NAT table(s).    */
2518/* ------------------------------------------------------------------------ */
2519nat_t *nat_new(fin, np, natsave, flags, direction)
2520fr_info_t *fin;
2521ipnat_t *np;
2522nat_t **natsave;
2523u_int flags;
2524int direction;
2525{
2526	tcphdr_t *tcp = NULL;
2527	hostmap_t *hm = NULL;
2528	nat_t *nat, *natl;
2529	u_int nflags;
2530	natinfo_t ni;
2531	int move;
2532	ipf_stack_t *ifs = fin->fin_ifs;
2533
2534	/*
2535	 * Trigger automatic call to nat_flushtable() if the
2536	 * table has reached capcity specified by hi watermark.
2537	 */
2538	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
2539		ifs->ifs_nat_doflush = 1;
2540
2541	/*
2542	 * If automatic flushing did not do its job, and the table
2543	 * has filled up, don't try to create a new entry.
2544	 */
2545	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2546		ifs->ifs_nat_stats.ns_memfail++;
2547		return NULL;
2548	}
2549
2550	move = 1;
2551	nflags = np->in_flags & flags;
2552	nflags &= NAT_FROMRULE;
2553
2554	ni.nai_np = np;
2555	ni.nai_nflags = nflags;
2556	ni.nai_flags = flags;
2557
2558	/* Give me a new nat */
2559	KMALLOC(nat, nat_t *);
2560	if (nat == NULL) {
2561		ifs->ifs_nat_stats.ns_memfail++;
2562		/*
2563		 * Try to automatically tune the max # of entries in the
2564		 * table allowed to be less than what will cause kmem_alloc()
2565		 * to fail and try to eliminate panics due to out of memory
2566		 * conditions arising.
2567		 */
2568		if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2569			ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2570			printf("ipf_nattable_max reduced to %d\n",
2571				ifs->ifs_ipf_nattable_max);
2572		}
2573		return NULL;
2574	}
2575
2576	if (flags & IPN_TCPUDP) {
2577		tcp = fin->fin_dp;
2578		ni.nai_sport = htons(fin->fin_sport);
2579		ni.nai_dport = htons(fin->fin_dport);
2580	} else if (flags & IPN_ICMPQUERY) {
2581		/*
2582		 * In the ICMP query NAT code, we translate the ICMP id fields
2583		 * to make them unique. This is indepedent of the ICMP type
2584		 * (e.g. in the unlikely event that a host sends an echo and
2585		 * an tstamp request with the same id, both packets will have
2586		 * their ip address/id field changed in the same way).
2587		 */
2588		/* The icmp_id field is used by the sender to identify the
2589		 * process making the icmp request. (the receiver justs
2590		 * copies it back in its response). So, it closely matches
2591		 * the concept of source port. We overlay sport, so we can
2592		 * maximally reuse the existing code.
2593		 */
2594		ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2595		ni.nai_dport = ni.nai_sport;
2596	}
2597
2598	bzero((char *)nat, sizeof(*nat));
2599	nat->nat_flags = flags;
2600	nat->nat_redir = np->in_redir;
2601
2602	if ((flags & NAT_SLAVE) == 0) {
2603		MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2604	}
2605
2606	/*
2607	 * Search the current table for a match.
2608	 */
2609	if (direction == NAT_OUTBOUND) {
2610		/*
2611		 * We can now arrange to call this for the same connection
2612		 * because ipf_nat_new doesn't protect the code path into
2613		 * this function.
2614		 */
2615		natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2616				     fin->fin_src, fin->fin_dst);
2617		if (natl != NULL) {
2618			KFREE(nat);
2619			nat = natl;
2620			goto done;
2621		}
2622
2623		move = nat_newmap(fin, nat, &ni);
2624		if (move == -1)
2625			goto badnat;
2626
2627		np = ni.nai_np;
2628	} else {
2629		/*
2630		 * NAT_INBOUND is used only for redirects rules
2631		 */
2632		natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2633				    fin->fin_src, fin->fin_dst);
2634		if (natl != NULL) {
2635			KFREE(nat);
2636			nat = natl;
2637			goto done;
2638		}
2639
2640		move = nat_newrdr(fin, nat, &ni);
2641		if (move == -1)
2642			goto badnat;
2643
2644		np = ni.nai_np;
2645	}
2646
2647	if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2648		if (np->in_redir == NAT_REDIRECT) {
2649			nat_delrdr(np);
2650			nat_addrdr(np, ifs);
2651		} else if (np->in_redir == NAT_MAP) {
2652			nat_delnat(np);
2653			nat_addnat(np, ifs);
2654		}
2655	}
2656
2657	if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2658		goto badnat;
2659	}
2660
2661	nat_calc_chksum_diffs(nat);
2662
2663	if (flags & SI_WILDP)
2664		ifs->ifs_nat_stats.ns_wilds++;
2665	fin->fin_flx |= FI_NEWNAT;
2666	goto done;
2667badnat:
2668	ifs->ifs_nat_stats.ns_badnat++;
2669	if ((hm = nat->nat_hm) != NULL)
2670		fr_hostmapdel(&hm);
2671	KFREE(nat);
2672	nat = NULL;
2673done:
2674	if ((flags & NAT_SLAVE) == 0) {
2675		MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2676	}
2677	return nat;
2678}
2679
2680
2681/* ------------------------------------------------------------------------ */
2682/* Function:    nat_finalise                                                */
2683/* Returns:     int - 0 == sucess, -1 == failure                            */
2684/* Parameters:  fin(I) - pointer to packet information                      */
2685/*              nat(I) - pointer to NAT entry                               */
2686/*              ni(I)  - pointer to structure with misc. information needed */
2687/*                       to create new NAT entry.                           */
2688/* Write Lock:  ipf_nat                                                     */
2689/*                                                                          */
2690/* This is the tail end of constructing a new NAT entry and is the same     */
2691/* for both IPv4 and IPv6.                                                  */
2692/* ------------------------------------------------------------------------ */
2693/*ARGSUSED*/
2694static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2695fr_info_t *fin;
2696nat_t *nat;
2697natinfo_t *ni;
2698tcphdr_t *tcp;
2699nat_t **natsave;
2700int direction;
2701{
2702	frentry_t *fr;
2703	ipnat_t *np;
2704	ipf_stack_t *ifs = fin->fin_ifs;
2705
2706	np = ni->nai_np;
2707
2708	COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2709
2710#ifdef	IPFILTER_SYNC
2711	if ((nat->nat_flags & SI_CLONE) == 0)
2712		nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2713#endif
2714
2715	nat->nat_me = natsave;
2716	nat->nat_dir = direction;
2717	nat->nat_ifps[0] = np->in_ifps[0];
2718	nat->nat_ifps[1] = np->in_ifps[1];
2719	nat->nat_ptr = np;
2720	nat->nat_p = fin->fin_p;
2721	nat->nat_v = fin->fin_v;
2722	nat->nat_mssclamp = np->in_mssclamp;
2723	fr = fin->fin_fr;
2724	nat->nat_fr = fr;
2725
2726	if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2727		if (appr_new(fin, nat) == -1)
2728			return -1;
2729
2730	if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2731		if (ifs->ifs_nat_logging)
2732			nat_log(nat, (u_int)np->in_redir, ifs);
2733		np->in_use++;
2734		if (fr != NULL) {
2735			MUTEX_ENTER(&fr->fr_lock);
2736			fr->fr_ref++;
2737			MUTEX_EXIT(&fr->fr_lock);
2738		}
2739		return 0;
2740	}
2741
2742	/*
2743	 * nat_insert failed, so cleanup time...
2744	 */
2745	return -1;
2746}
2747
2748
2749/* ------------------------------------------------------------------------ */
2750/* Function:   nat_insert                                                   */
2751/* Returns:    int - 0 == sucess, -1 == failure                             */
2752/* Parameters: nat(I) - pointer to NAT structure                            */
2753/*             rev(I) - flag indicating forward/reverse direction of packet */
2754/* Write Lock: ipf_nat                                                      */
2755/*                                                                          */
2756/* Insert a NAT entry into the hash tables for searching and add it to the  */
2757/* list of active NAT entries.  Adjust global counters when complete.       */
2758/* ------------------------------------------------------------------------ */
2759int	nat_insert(nat, rev, ifs)
2760nat_t	*nat;
2761int	rev;
2762ipf_stack_t *ifs;
2763{
2764	u_int hv1, hv2;
2765	nat_t **natp;
2766
2767	/*
2768	 * Try and return an error as early as possible, so calculate the hash
2769	 * entry numbers first and then proceed.
2770	 */
2771	if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2772		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2773				  0xffffffff);
2774		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2775				  ifs->ifs_ipf_nattable_sz);
2776		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2777				  0xffffffff);
2778		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2779				  ifs->ifs_ipf_nattable_sz);
2780	} else {
2781		hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2782		hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2783				  ifs->ifs_ipf_nattable_sz);
2784		hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2785		hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2786				  ifs->ifs_ipf_nattable_sz);
2787	}
2788
2789	if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2790	    ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2791		return -1;
2792	}
2793
2794	nat->nat_hv[0] = hv1;
2795	nat->nat_hv[1] = hv2;
2796
2797	MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2798
2799	nat->nat_rev = rev;
2800	nat->nat_ref = 1;
2801	nat->nat_bytes[0] = 0;
2802	nat->nat_pkts[0] = 0;
2803	nat->nat_bytes[1] = 0;
2804	nat->nat_pkts[1] = 0;
2805
2806	nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2807	nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2808
2809	if (nat->nat_ifnames[1][0] !='\0') {
2810		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2811		nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2812	} else {
2813		(void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2814			       LIFNAMSIZ);
2815		nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2816		nat->nat_ifps[1] = nat->nat_ifps[0];
2817	}
2818
2819	nat->nat_next = ifs->ifs_nat_instances;
2820	nat->nat_pnext = &ifs->ifs_nat_instances;
2821	if (ifs->ifs_nat_instances)
2822		ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2823	ifs->ifs_nat_instances = nat;
2824
2825	natp = &ifs->ifs_nat_table[0][hv1];
2826	if (*natp)
2827		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2828	nat->nat_phnext[0] = natp;
2829	nat->nat_hnext[0] = *natp;
2830	*natp = nat;
2831	ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2832
2833	natp = &ifs->ifs_nat_table[1][hv2];
2834	if (*natp)
2835		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2836	nat->nat_phnext[1] = natp;
2837	nat->nat_hnext[1] = *natp;
2838	*natp = nat;
2839	ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2840
2841	fr_setnatqueue(nat, rev, ifs);
2842
2843	ifs->ifs_nat_stats.ns_added++;
2844	ifs->ifs_nat_stats.ns_inuse++;
2845	return 0;
2846}
2847
2848
2849/* ------------------------------------------------------------------------ */
2850/* Function:    nat_icmperrorlookup                                         */
2851/* Returns:     nat_t* - point to matching NAT structure                    */
2852/* Parameters:  fin(I) - pointer to packet information                      */
2853/*              dir(I) - direction of packet (in/out)                       */
2854/*                                                                          */
2855/* Check if the ICMP error message is related to an existing TCP, UDP or    */
2856/* ICMP query nat entry.  It is assumed that the packet is already of the   */
2857/* the required length.                                                     */
2858/* ------------------------------------------------------------------------ */
2859nat_t *nat_icmperrorlookup(fin, dir)
2860fr_info_t *fin;
2861int dir;
2862{
2863	int flags = 0, minlen;
2864	icmphdr_t *orgicmp;
2865	tcphdr_t *tcp = NULL;
2866	u_short data[2];
2867	nat_t *nat;
2868	ip_t *oip;
2869	u_int p;
2870
2871	/*
2872	 * Does it at least have the return (basic) IP header ?
2873	 * Only a basic IP header (no options) should be with an ICMP error
2874	 * header.  Also, if it's not an error type, then return.
2875	 */
2876	if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2877		return NULL;
2878
2879	/*
2880	 * Check packet size
2881	 */
2882	oip = (ip_t *)((char *)fin->fin_dp + 8);
2883	minlen = IP_HL(oip) << 2;
2884	if ((minlen < sizeof(ip_t)) ||
2885	    (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2886		return NULL;
2887	/*
2888	 * Is the buffer big enough for all of it ?  It's the size of the IP
2889	 * header claimed in the encapsulated part which is of concern.  It
2890	 * may be too big to be in this buffer but not so big that it's
2891	 * outside the ICMP packet, leading to TCP deref's causing problems.
2892	 * This is possible because we don't know how big oip_hl is when we
2893	 * do the pullup early in fr_check() and thus can't gaurantee it is
2894	 * all here now.
2895	 */
2896#ifdef  _KERNEL
2897	{
2898	mb_t *m;
2899
2900	m = fin->fin_m;
2901# if defined(MENTAT)
2902	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2903		return NULL;
2904# else
2905	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2906	    (char *)fin->fin_ip + M_LEN(m))
2907		return NULL;
2908# endif
2909	}
2910#endif
2911
2912	if (fin->fin_daddr != oip->ip_src.s_addr)
2913		return NULL;
2914
2915	p = oip->ip_p;
2916	if (p == IPPROTO_TCP)
2917		flags = IPN_TCP;
2918	else if (p == IPPROTO_UDP)
2919		flags = IPN_UDP;
2920	else if (p == IPPROTO_ICMP) {
2921		orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2922
2923		/* see if this is related to an ICMP query */
2924		if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2925			data[0] = fin->fin_data[0];
2926			data[1] = fin->fin_data[1];
2927			fin->fin_data[0] = 0;
2928			fin->fin_data[1] = orgicmp->icmp_id;
2929
2930			flags = IPN_ICMPERR|IPN_ICMPQUERY;
2931			/*
2932			 * NOTE : dir refers to the direction of the original
2933			 *        ip packet. By definition the icmp error
2934			 *        message flows in the opposite direction.
2935			 */
2936			if (dir == NAT_INBOUND)
2937				nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2938						   oip->ip_src);
2939			else
2940				nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2941						    oip->ip_src);
2942			fin->fin_data[0] = data[0];
2943			fin->fin_data[1] = data[1];
2944			return nat;
2945		}
2946	}
2947
2948	if (flags & IPN_TCPUDP) {
2949		minlen += 8;		/* + 64bits of data to get ports */
2950		if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2951			return NULL;
2952
2953		data[0] = fin->fin_data[0];
2954		data[1] = fin->fin_data[1];
2955		tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2956		fin->fin_data[0] = ntohs(tcp->th_dport);
2957		fin->fin_data[1] = ntohs(tcp->th_sport);
2958
2959		if (dir == NAT_INBOUND) {
2960			nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2961					   oip->ip_src);
2962		} else {
2963			nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2964					    oip->ip_src);
2965		}
2966		fin->fin_data[0] = data[0];
2967		fin->fin_data[1] = data[1];
2968		return nat;
2969	}
2970	if (dir == NAT_INBOUND)
2971		return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2972	else
2973		return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2974}
2975
2976
2977/* ------------------------------------------------------------------------ */
2978/* Function:    nat_icmperror                                               */
2979/* Returns:     nat_t* - point to matching NAT structure                    */
2980/* Parameters:  fin(I)    - pointer to packet information                   */
2981/*              nflags(I) - NAT flags for this packet                       */
2982/*              dir(I)    - direction of packet (in/out)                    */
2983/*                                                                          */
2984/* Fix up an ICMP packet which is an error message for an existing NAT      */
2985/* session.  This will correct both packet header data and checksums.       */
2986/*                                                                          */
2987/* This should *ONLY* be used for incoming ICMP error packets to make sure  */
2988/* a NAT'd ICMP packet gets correctly recognised.                           */
2989/* ------------------------------------------------------------------------ */
2990nat_t *nat_icmperror(fin, nflags, dir)
2991fr_info_t *fin;
2992u_int *nflags;
2993int dir;
2994{
2995	u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2996	struct in_addr in;
2997	icmphdr_t *icmp, *orgicmp;
2998	int dlen;
2999	udphdr_t *udp;
3000	tcphdr_t *tcp;
3001	nat_t *nat;
3002	ip_t *oip;
3003	if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3004		return NULL;
3005
3006	/*
3007	 * nat_icmperrorlookup() looks up nat entry associated with the
3008	 * offending IP packet and returns pointer to the entry, or NULL
3009	 * if packet wasn't natted or for `defective' packets.
3010	 */
3011
3012	if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3013		return NULL;
3014
3015	sumd2 = 0;
3016	*nflags = IPN_ICMPERR;
3017	icmp = fin->fin_dp;
3018	oip = (ip_t *)&icmp->icmp_ip;
3019	udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3020	tcp = (tcphdr_t *)udp;
3021	dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3022
3023	/*
3024	 * Need to adjust ICMP header to include the real IP#'s and
3025	 * port #'s.  There are three steps required.
3026	 *
3027	 * Step 1
3028	 * Fix the IP addresses in the offending IP packet and update
3029	 * ip header checksum to compensate for the change.
3030	 *
3031	 * No update needed here for icmp_cksum because the ICMP checksum
3032	 * is calculated over the complete ICMP packet, which includes the
3033	 * changed oip IP addresses and oip->ip_sum.  These two changes
3034	 * cancel each other out (if the delta for the IP address is x,
3035	 * then the delta for ip_sum is minus x).
3036	 */
3037
3038	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3039		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3040		in = nat->nat_inip;
3041		oip->ip_src = in;
3042	} else {
3043		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3044		in = nat->nat_outip;
3045		oip->ip_dst = in;
3046	}
3047
3048	sum2 = LONG_SUM(ntohl(in.s_addr));
3049	CALC_SUMD(sum1, sum2, sumd);
3050	fix_datacksum(&oip->ip_sum, sumd);
3051
3052	/*
3053	 * Step 2
3054	 * Perform other adjustments based on protocol of offending packet.
3055	 */
3056
3057	switch (oip->ip_p) {
3058		case IPPROTO_TCP :
3059		case IPPROTO_UDP :
3060
3061			/*
3062			* For offending TCP/UDP IP packets, translate the ports
3063			* based on the NAT specification.
3064			*
3065			* Advance notice : Now it becomes complicated :-)
3066			*
3067			* Since the port and IP addresse fields are both part
3068			* of the TCP/UDP checksum of the offending IP packet,
3069			* we need to adjust that checksum as well.
3070			*
3071			* To further complicate things, the TCP/UDP checksum
3072			* may not be present.  We must check to see if the
3073			* length of the data portion is big enough to hold
3074			* the checksum.  In the UDP case, a test to determine
3075			* if the checksum is even set is also required.
3076			*
3077			* Any changes to an IP address, port or checksum within
3078			* the ICMP packet requires a change to icmp_cksum.
3079			*
3080			* Be extremely careful here ... The change is dependent
3081			* upon whether or not the TCP/UPD checksum is present.
3082			*
3083			* If TCP/UPD checksum is present, the icmp_cksum must
3084			* compensate for checksum modification resulting from
3085			* IP address change only.  Port change and resulting
3086			* data checksum adjustments cancel each other out.
3087			*
3088			* If TCP/UDP checksum is not present, icmp_cksum must
3089			* compensate for port change only.  The IP address
3090			* change does not modify anything else in this case.
3091			*/
3092
3093			psum1 = 0;
3094			psum2 = 0;
3095			psumd = 0;
3096
3097			if ((tcp->th_dport == nat->nat_oport) &&
3098			    (tcp->th_sport != nat->nat_inport)) {
3099
3100				/*
3101				 * Translate the source port.
3102				 */
3103
3104				psum1 = ntohs(tcp->th_sport);
3105				psum2 = ntohs(nat->nat_inport);
3106				tcp->th_sport = nat->nat_inport;
3107
3108			} else if ((tcp->th_sport == nat->nat_oport) &&
3109				    (tcp->th_dport != nat->nat_outport)) {
3110
3111				/*
3112				 * Translate the destination port.
3113				 */
3114
3115				psum1 = ntohs(tcp->th_dport);
3116				psum2 = ntohs(nat->nat_outport);
3117				tcp->th_dport = nat->nat_outport;
3118			}
3119
3120			if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3121
3122				/*
3123				 * TCP checksum present.
3124				 *
3125				 * Adjust data checksum and icmp checksum to
3126				 * compensate for any IP address change.
3127				 */
3128
3129				sum1 = ntohs(tcp->th_sum);
3130				fix_datacksum(&tcp->th_sum, sumd);
3131				sum2 = ntohs(tcp->th_sum);
3132				sumd2 = sumd << 1;
3133				CALC_SUMD(sum1, sum2, sumd);
3134				sumd2 += sumd;
3135
3136				/*
3137				 * Also make data checksum adjustment to
3138				 * compensate for any port change.
3139				 */
3140
3141				if (psum1 != psum2) {
3142					CALC_SUMD(psum1, psum2, psumd);
3143					fix_datacksum(&tcp->th_sum, psumd);
3144				}
3145
3146			} else if ((oip->ip_p == IPPROTO_UDP) &&
3147				   (dlen >= 8) && (udp->uh_sum != 0)) {
3148
3149				/*
3150				 * The UDP checksum is present and set.
3151				 *
3152				 * Adjust data checksum and icmp checksum to
3153				 * compensate for any IP address change.
3154				 */
3155
3156				sum1 = ntohs(udp->uh_sum);
3157				fix_datacksum(&udp->uh_sum, sumd);
3158				sum2 = ntohs(udp->uh_sum);
3159				sumd2 = sumd << 1;
3160				CALC_SUMD(sum1, sum2, sumd);
3161				sumd2 += sumd;
3162
3163				/*
3164				 * Also make data checksum adjustment to
3165				 * compensate for any port change.
3166				 */
3167
3168				if (psum1 != psum2) {
3169					CALC_SUMD(psum1, psum2, psumd);
3170					fix_datacksum(&udp->uh_sum, psumd);
3171				}
3172
3173			} else {
3174
3175				/*
3176				 * Data checksum was not present.
3177				 *
3178				 * Compensate for any port change.
3179				 */
3180
3181				CALC_SUMD(psum2, psum1, psumd);
3182				sumd2 += psumd;
3183			}
3184			break;
3185
3186		case IPPROTO_ICMP :
3187
3188			orgicmp = (icmphdr_t *)udp;
3189
3190			if ((nat->nat_dir == NAT_OUTBOUND) &&
3191			    (orgicmp->icmp_id != nat->nat_inport) &&
3192			    (dlen >= 8)) {
3193
3194				/*
3195				 * Fix ICMP checksum (of the offening ICMP
3196				 * query packet) to compensate the change
3197				 * in the ICMP id of the offending ICMP
3198				 * packet.
3199				 *
3200				 * Since you modify orgicmp->icmp_id with
3201				 * a delta (say x) and you compensate that
3202				 * in origicmp->icmp_cksum with a delta
3203				 * minus x, you don't have to adjust the
3204				 * overall icmp->icmp_cksum
3205				 */
3206
3207				sum1 = ntohs(orgicmp->icmp_id);
3208				sum2 = ntohs(nat->nat_inport);
3209				CALC_SUMD(sum1, sum2, sumd);
3210				orgicmp->icmp_id = nat->nat_inport;
3211				fix_datacksum(&orgicmp->icmp_cksum, sumd);
3212
3213			} /* nat_dir can't be NAT_INBOUND for icmp queries */
3214
3215			break;
3216
3217		default :
3218
3219			break;
3220
3221	} /* switch (oip->ip_p) */
3222
3223	/*
3224	 * Step 3
3225	 * Make the adjustments to icmp checksum.
3226	 */
3227
3228	if (sumd2 != 0) {
3229		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3230		sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3231		fix_incksum(&icmp->icmp_cksum, sumd2);
3232	}
3233	return nat;
3234}
3235
3236
3237/*
3238 * NB: these lookups don't lock access to the list, it assumed that it has
3239 * already been done!
3240 */
3241
3242/* ------------------------------------------------------------------------ */
3243/* Function:    nat_inlookup                                                */
3244/* Returns:     nat_t* - NULL == no match,                                  */
3245/*                       else pointer to matching NAT entry                 */
3246/* Parameters:  fin(I)    - pointer to packet information                   */
3247/*              flags(I)  - NAT flags for this packet                       */
3248/*              p(I)      - protocol for this packet                        */
3249/*              src(I)    - source IP address                               */
3250/*              mapdst(I) - destination IP address                          */
3251/*                                                                          */
3252/* Lookup a nat entry based on the mapped destination ip address/port and   */
3253/* real source address/port.  We use this lookup when receiving a packet,   */
3254/* we're looking for a table entry, based on the destination address.       */
3255/*                                                                          */
3256/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3257/*                                                                          */
3258/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3259/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3260/*                                                                          */
3261/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3262/*            the packet is of said protocol                                */
3263/* ------------------------------------------------------------------------ */
3264nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3265fr_info_t *fin;
3266u_int flags, p;
3267struct in_addr src , mapdst;
3268{
3269	u_short sport, dport;
3270	ipnat_t *ipn;
3271	u_int sflags;
3272	nat_t *nat;
3273	int nflags;
3274	u_32_t dst;
3275	void *ifp;
3276	u_int hv;
3277	ipf_stack_t *ifs = fin->fin_ifs;
3278
3279	if (fin != NULL)
3280		ifp = fin->fin_ifp;
3281	else
3282		ifp = NULL;
3283	sport = 0;
3284	dport = 0;
3285	dst = mapdst.s_addr;
3286	sflags = flags & NAT_TCPUDPICMP;
3287
3288	switch (p)
3289	{
3290	case IPPROTO_TCP :
3291	case IPPROTO_UDP :
3292		sport = htons(fin->fin_data[0]);
3293		dport = htons(fin->fin_data[1]);
3294		break;
3295	case IPPROTO_ICMP :
3296		if (flags & IPN_ICMPERR)
3297			sport = fin->fin_data[1];
3298		else
3299			dport = fin->fin_data[1];
3300		break;
3301	default :
3302		break;
3303	}
3304
3305
3306	if ((flags & SI_WILDP) != 0)
3307		goto find_in_wild_ports;
3308
3309	hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3310	hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3311	nat = ifs->ifs_nat_table[1][hv];
3312	for (; nat; nat = nat->nat_hnext[1]) {
3313		if (nat->nat_v != 4)
3314			continue;
3315
3316		if (nat->nat_ifps[0] != NULL) {
3317			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3318				continue;
3319		} else if (ifp != NULL)
3320			nat->nat_ifps[0] = ifp;
3321
3322		nflags = nat->nat_flags;
3323
3324		if (nat->nat_oip.s_addr == src.s_addr &&
3325		    nat->nat_outip.s_addr == dst &&
3326		    (((p == 0) &&
3327		      (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3328		     || (p == nat->nat_p))) {
3329			switch (p)
3330			{
3331#if 0
3332			case IPPROTO_GRE :
3333				if (nat->nat_call[1] != fin->fin_data[0])
3334					continue;
3335				break;
3336#endif
3337			case IPPROTO_ICMP :
3338				if ((flags & IPN_ICMPERR) != 0) {
3339					if (nat->nat_outport != sport)
3340						continue;
3341				} else {
3342					if (nat->nat_outport != dport)
3343						continue;
3344				}
3345				break;
3346			case IPPROTO_TCP :
3347			case IPPROTO_UDP :
3348				if (nat->nat_oport != sport)
3349					continue;
3350				if (nat->nat_outport != dport)
3351					continue;
3352				break;
3353			default :
3354				break;
3355			}
3356
3357			ipn = nat->nat_ptr;
3358			if ((ipn != NULL) && (nat->nat_aps != NULL))
3359				if (appr_match(fin, nat) != 0)
3360					continue;
3361			return nat;
3362		}
3363	}
3364
3365	/*
3366	 * So if we didn't find it but there are wildcard members in the hash
3367	 * table, go back and look for them.  We do this search and update here
3368	 * because it is modifying the NAT table and we want to do this only
3369	 * for the first packet that matches.  The exception, of course, is
3370	 * for "dummy" (FI_IGNORE) lookups.
3371	 */
3372find_in_wild_ports:
3373	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3374		return NULL;
3375	if (ifs->ifs_nat_stats.ns_wilds == 0)
3376		return NULL;
3377
3378	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3379
3380	hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3381	hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3382
3383	WRITE_ENTER(&ifs->ifs_ipf_nat);
3384
3385	nat = ifs->ifs_nat_table[1][hv];
3386	for (; nat; nat = nat->nat_hnext[1]) {
3387		if (nat->nat_v != 4)
3388			continue;
3389
3390		if (nat->nat_ifps[0] != NULL) {
3391			if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3392				continue;
3393		} else if (ifp != NULL)
3394			nat->nat_ifps[0] = ifp;
3395
3396		if (nat->nat_p != fin->fin_p)
3397			continue;
3398		if (nat->nat_oip.s_addr != src.s_addr ||
3399		    nat->nat_outip.s_addr != dst)
3400			continue;
3401
3402		nflags = nat->nat_flags;
3403		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3404			continue;
3405
3406		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3407			       NAT_INBOUND) == 1) {
3408			if ((fin->fin_flx & FI_IGNORE) != 0)
3409				break;
3410			if ((nflags & SI_CLONE) != 0) {
3411				nat = fr_natclone(fin, nat);
3412				if (nat == NULL)
3413					break;
3414			} else {
3415				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3416				ifs->ifs_nat_stats.ns_wilds--;
3417				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3418			}
3419			nat->nat_oport = sport;
3420			nat->nat_outport = dport;
3421			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3422			nat_tabmove(nat, ifs);
3423			break;
3424		}
3425	}
3426
3427	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3428
3429	return nat;
3430}
3431
3432
3433/* ------------------------------------------------------------------------ */
3434/* Function:    nat_tabmove                                                 */
3435/* Returns:     Nil                                                         */
3436/* Parameters:  nat(I) - pointer to NAT structure                           */
3437/* Write Lock:  ipf_nat                                                     */
3438/*                                                                          */
3439/* This function is only called for TCP/UDP NAT table entries where the     */
3440/* original was placed in the table without hashing on the ports and we now */
3441/* want to include hashing on port numbers.                                 */
3442/* ------------------------------------------------------------------------ */
3443static void nat_tabmove(nat, ifs)
3444nat_t *nat;
3445ipf_stack_t *ifs;
3446{
3447	nat_t **natp;
3448	u_int hv;
3449
3450	if (nat->nat_flags & SI_CLONE)
3451		return;
3452
3453	/*
3454	 * Remove the NAT entry from the old location
3455	 */
3456	if (nat->nat_hnext[0])
3457		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3458	*nat->nat_phnext[0] = nat->nat_hnext[0];
3459	ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3460
3461	if (nat->nat_hnext[1])
3462		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3463	*nat->nat_phnext[1] = nat->nat_hnext[1];
3464	ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3465
3466	/*
3467	 * Add into the NAT table in the new position
3468	 */
3469	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3470	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3471			 ifs->ifs_ipf_nattable_sz);
3472	nat->nat_hv[0] = hv;
3473	natp = &ifs->ifs_nat_table[0][hv];
3474	if (*natp)
3475		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3476	nat->nat_phnext[0] = natp;
3477	nat->nat_hnext[0] = *natp;
3478	*natp = nat;
3479	ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3480
3481	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3482	hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3483			 ifs->ifs_ipf_nattable_sz);
3484	nat->nat_hv[1] = hv;
3485	natp = &ifs->ifs_nat_table[1][hv];
3486	if (*natp)
3487		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3488	nat->nat_phnext[1] = natp;
3489	nat->nat_hnext[1] = *natp;
3490	*natp = nat;
3491	ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3492}
3493
3494
3495/* ------------------------------------------------------------------------ */
3496/* Function:    nat_outlookup                                               */
3497/* Returns:     nat_t* - NULL == no match,                                  */
3498/*                       else pointer to matching NAT entry                 */
3499/* Parameters:  fin(I)   - pointer to packet information                    */
3500/*              flags(I) - NAT flags for this packet                        */
3501/*              p(I)     - protocol for this packet                         */
3502/*              src(I)   - source IP address                                */
3503/*              dst(I)   - destination IP address                           */
3504/*              rw(I)    - 1 == write lock on ipf_nat held, 0 == read lock. */
3505/*                                                                          */
3506/* Lookup a nat entry based on the source 'real' ip address/port and        */
3507/* destination address/port.  We use this lookup when sending a packet out, */
3508/* we're looking for a table entry, based on the source address.            */
3509/*                                                                          */
3510/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.         */
3511/*                                                                          */
3512/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN      */
3513/*       THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags.             */
3514/*                                                                          */
3515/* flags   -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if   */
3516/*            the packet is of said protocol                                */
3517/* ------------------------------------------------------------------------ */
3518nat_t *nat_outlookup(fin, flags, p, src, dst)
3519fr_info_t *fin;
3520u_int flags, p;
3521struct in_addr src , dst;
3522{
3523	u_short sport, dport;
3524	u_int sflags;
3525	ipnat_t *ipn;
3526	u_32_t srcip;
3527	nat_t *nat;
3528	int nflags;
3529	void *ifp;
3530	u_int hv;
3531	ipf_stack_t *ifs = fin->fin_ifs;
3532
3533	ifp = fin->fin_ifp;
3534
3535	srcip = src.s_addr;
3536	sflags = flags & IPN_TCPUDPICMP;
3537	sport = 0;
3538	dport = 0;
3539
3540	switch (p)
3541	{
3542	case IPPROTO_TCP :
3543	case IPPROTO_UDP :
3544		sport = htons(fin->fin_data[0]);
3545		dport = htons(fin->fin_data[1]);
3546		break;
3547	case IPPROTO_ICMP :
3548		if (flags & IPN_ICMPERR)
3549			sport = fin->fin_data[1];
3550		else
3551			dport = fin->fin_data[1];
3552		break;
3553	default :
3554		break;
3555	}
3556
3557	if ((flags & SI_WILDP) != 0)
3558		goto find_out_wild_ports;
3559
3560	hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3561	hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3562	nat = ifs->ifs_nat_table[0][hv];
3563	for (; nat; nat = nat->nat_hnext[0]) {
3564		if (nat->nat_v != 4)
3565			continue;
3566
3567		if (nat->nat_ifps[1] != NULL) {
3568			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3569				continue;
3570		} else if (ifp != NULL)
3571			nat->nat_ifps[1] = ifp;
3572
3573		nflags = nat->nat_flags;
3574
3575		if (nat->nat_inip.s_addr == srcip &&
3576		    nat->nat_oip.s_addr == dst.s_addr &&
3577		    (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3578		     || (p == nat->nat_p))) {
3579			switch (p)
3580			{
3581#if 0
3582			case IPPROTO_GRE :
3583				if (nat->nat_call[1] != fin->fin_data[0])
3584					continue;
3585				break;
3586#endif
3587			case IPPROTO_TCP :
3588			case IPPROTO_UDP :
3589				if (nat->nat_oport != dport)
3590					continue;
3591				if (nat->nat_inport != sport)
3592					continue;
3593				break;
3594			default :
3595				break;
3596			}
3597
3598			ipn = nat->nat_ptr;
3599			if ((ipn != NULL) && (nat->nat_aps != NULL))
3600				if (appr_match(fin, nat) != 0)
3601					continue;
3602			return nat;
3603		}
3604	}
3605
3606	/*
3607	 * So if we didn't find it but there are wildcard members in the hash
3608	 * table, go back and look for them.  We do this search and update here
3609	 * because it is modifying the NAT table and we want to do this only
3610	 * for the first packet that matches.  The exception, of course, is
3611	 * for "dummy" (FI_IGNORE) lookups.
3612	 */
3613find_out_wild_ports:
3614	if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3615		return NULL;
3616	if (ifs->ifs_nat_stats.ns_wilds == 0)
3617		return NULL;
3618
3619	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3620
3621	hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3622	hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3623
3624	WRITE_ENTER(&ifs->ifs_ipf_nat);
3625
3626	nat = ifs->ifs_nat_table[0][hv];
3627	for (; nat; nat = nat->nat_hnext[0]) {
3628		if (nat->nat_v != 4)
3629			continue;
3630
3631		if (nat->nat_ifps[1] != NULL) {
3632			if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3633				continue;
3634		} else if (ifp != NULL)
3635			nat->nat_ifps[1] = ifp;
3636
3637		if (nat->nat_p != fin->fin_p)
3638			continue;
3639		if ((nat->nat_inip.s_addr != srcip) ||
3640		    (nat->nat_oip.s_addr != dst.s_addr))
3641			continue;
3642
3643		nflags = nat->nat_flags;
3644		if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3645			continue;
3646
3647		if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3648			       NAT_OUTBOUND) == 1) {
3649			if ((fin->fin_flx & FI_IGNORE) != 0)
3650				break;
3651			if ((nflags & SI_CLONE) != 0) {
3652				nat = fr_natclone(fin, nat);
3653				if (nat == NULL)
3654					break;
3655			} else {
3656				MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3657				ifs->ifs_nat_stats.ns_wilds--;
3658				MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3659			}
3660			nat->nat_inport = sport;
3661			nat->nat_oport = dport;
3662			if (nat->nat_outport == 0)
3663				nat->nat_outport = sport;
3664			nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3665			nat_tabmove(nat, ifs);
3666			break;
3667		}
3668	}
3669
3670	MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3671
3672	return nat;
3673}
3674
3675
3676/* ------------------------------------------------------------------------ */
3677/* Function:    nat_lookupredir                                             */
3678/* Returns:     nat_t* - NULL == no match,                                  */
3679/*                       else pointer to matching NAT entry                 */
3680/* Parameters:  np(I) - pointer to description of packet to find NAT table  */
3681/*                      entry for.                                          */
3682/*                                                                          */
3683/* Lookup the NAT tables to search for a matching redirect                  */
3684/* ------------------------------------------------------------------------ */
3685nat_t *nat_lookupredir(np, ifs)
3686natlookup_t *np;
3687ipf_stack_t *ifs;
3688{
3689	fr_info_t fi;
3690	nat_t *nat;
3691
3692	bzero((char *)&fi, sizeof(fi));
3693	if (np->nl_flags & IPN_IN) {
3694		fi.fin_data[0] = ntohs(np->nl_realport);
3695		fi.fin_data[1] = ntohs(np->nl_outport);
3696	} else {
3697		fi.fin_data[0] = ntohs(np->nl_inport);
3698		fi.fin_data[1] = ntohs(np->nl_outport);
3699	}
3700	if (np->nl_flags & IPN_TCP)
3701		fi.fin_p = IPPROTO_TCP;
3702	else if (np->nl_flags & IPN_UDP)
3703		fi.fin_p = IPPROTO_UDP;
3704	else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3705		fi.fin_p = IPPROTO_ICMP;
3706
3707	fi.fin_ifs = ifs;
3708	/*
3709	 * We can do two sorts of lookups:
3710	 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3711	 * - default: we have the `in' and `out' address, look for `real'.
3712	 */
3713	if (np->nl_flags & IPN_IN) {
3714		if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3715					np->nl_realip, np->nl_outip))) {
3716			np->nl_inip = nat->nat_inip;
3717			np->nl_inport = nat->nat_inport;
3718		}
3719	} else {
3720		/*
3721		 * If nl_inip is non null, this is a lookup based on the real
3722		 * ip address. Else, we use the fake.
3723		 */
3724		if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3725					 np->nl_inip, np->nl_outip))) {
3726
3727			if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3728				fr_info_t fin;
3729				bzero((char *)&fin, sizeof(fin));
3730				fin.fin_p = nat->nat_p;
3731				fin.fin_data[0] = ntohs(nat->nat_outport);
3732				fin.fin_data[1] = ntohs(nat->nat_oport);
3733				fin.fin_ifs = ifs;
3734				if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3735						 nat->nat_outip,
3736						 nat->nat_oip) != NULL) {
3737					np->nl_flags &= ~IPN_FINDFORWARD;
3738				}
3739			}
3740
3741			np->nl_realip = nat->nat_outip;
3742			np->nl_realport = nat->nat_outport;
3743		}
3744 	}
3745
3746	return nat;
3747}
3748
3749
3750/* ------------------------------------------------------------------------ */
3751/* Function:    nat_match                                                   */
3752/* Returns:     int - 0 == no match, 1 == match                             */
3753/* Parameters:  fin(I)   - pointer to packet information                    */
3754/*              np(I)    - pointer to NAT rule                              */
3755/*                                                                          */
3756/* Pull the matching of a packet against a NAT rule out of that complex     */
3757/* loop inside fr_checknatin() and lay it out properly in its own function. */
3758/* ------------------------------------------------------------------------ */
3759static int nat_match(fin, np)
3760fr_info_t *fin;
3761ipnat_t *np;
3762{
3763	frtuc_t *ft;
3764
3765	if (fin->fin_v != 4)
3766		return 0;
3767
3768	if (np->in_p && fin->fin_p != np->in_p)
3769		return 0;
3770
3771	if (fin->fin_out) {
3772		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3773			return 0;
3774		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3775		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3776			return 0;
3777		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3778		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3779			return 0;
3780	} else {
3781		if (!(np->in_redir & NAT_REDIRECT))
3782			return 0;
3783		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3784		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
3785			return 0;
3786		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3787		    ^ ((np->in_flags & IPN_NOTDST) != 0))
3788			return 0;
3789	}
3790
3791	ft = &np->in_tuc;
3792	if (!(fin->fin_flx & FI_TCPUDP) ||
3793	    (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3794		if (ft->ftu_scmp || ft->ftu_dcmp)
3795			return 0;
3796		return 1;
3797	}
3798
3799	return fr_tcpudpchk(fin, ft);
3800}
3801
3802
3803/* ------------------------------------------------------------------------ */
3804/* Function:    nat_update                                                  */
3805/* Returns:     Nil                                                         */
3806/* Parameters:	fin(I) - pointer to packet information			    */
3807/*		nat(I) - pointer to NAT structure			    */
3808/*              np(I)     - pointer to NAT rule                             */
3809/* Locks:	nat_lock						    */
3810/*                                                                          */
3811/* Updates the lifetime of a NAT table entry for non-TCP packets.  Must be  */
3812/* called with fin_rev updated - i.e. after calling nat_proto().            */
3813/* ------------------------------------------------------------------------ */
3814void nat_update(fin, nat, np)
3815fr_info_t *fin;
3816nat_t *nat;
3817ipnat_t *np;
3818{
3819	ipftq_t *ifq, *ifq2;
3820	ipftqent_t *tqe;
3821	ipf_stack_t *ifs = fin->fin_ifs;
3822
3823	tqe = &nat->nat_tqe;
3824	ifq = tqe->tqe_ifq;
3825
3826	/*
3827	 * We allow over-riding of NAT timeouts from NAT rules, even for
3828	 * TCP, however, if it is TCP and there is no rule timeout set,
3829	 * then do not update the timeout here.
3830	 */
3831	if (np != NULL)
3832		ifq2 = np->in_tqehead[fin->fin_rev];
3833	else
3834		ifq2 = NULL;
3835
3836	if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3837		(void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3838	} else {
3839		if (ifq2 == NULL) {
3840			if (nat->nat_p == IPPROTO_UDP)
3841				ifq2 = &ifs->ifs_nat_udptq;
3842			else if (nat->nat_p == IPPROTO_ICMP)
3843				ifq2 = &ifs->ifs_nat_icmptq;
3844			else
3845				ifq2 = &ifs->ifs_nat_iptq;
3846		}
3847
3848		fr_movequeue(tqe, ifq, ifq2, ifs);
3849	}
3850}
3851
3852
3853/* ------------------------------------------------------------------------ */
3854/* Function:    fr_checknatout                                              */
3855/* Returns:     int - -1 == packet failed NAT checks so block it,           */
3856/*                     0 == no packet translation occurred,                 */
3857/*                     1 == packet was successfully translated.             */
3858/* Parameters:  fin(I)   - pointer to packet information                    */
3859/*              passp(I) - pointer to filtering result flags                */
3860/*                                                                          */
3861/* Check to see if an outcoming packet should be changed.  ICMP packets are */
3862/* first checked to see if they match an existing entry (if an error),      */
3863/* otherwise a search of the current NAT table is made.  If neither results */
3864/* in a match then a search for a matching NAT rule is made.  Create a new  */
3865/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
3866/* packet header(s) as required.                                            */
3867/* ------------------------------------------------------------------------ */
3868int fr_checknatout(fin, passp)
3869fr_info_t *fin;
3870u_32_t *passp;
3871{
3872	ipnat_t *np = NULL, *npnext;
3873	struct ifnet *ifp, *sifp;
3874	icmphdr_t *icmp = NULL;
3875	tcphdr_t *tcp = NULL;
3876	int rval, natfailed;
3877	u_int nflags = 0;
3878	u_32_t ipa, iph;
3879	int natadd = 1;
3880	frentry_t *fr;
3881	nat_t *nat;
3882	ipf_stack_t *ifs = fin->fin_ifs;
3883
3884	if (ifs->ifs_fr_nat_lock != 0)
3885		return 0;
3886	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
3887		return 0;
3888
3889	natfailed = 0;
3890	fr = fin->fin_fr;
3891	sifp = fin->fin_ifp;
3892	if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3893	    fr->fr_tifs[fin->fin_rev].fd_ifp &&
3894	    fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3895		fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3896	ifp = fin->fin_ifp;
3897
3898	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3899		switch (fin->fin_p)
3900		{
3901		case IPPROTO_TCP :
3902			nflags = IPN_TCP;
3903			break;
3904		case IPPROTO_UDP :
3905			nflags = IPN_UDP;
3906			break;
3907		case IPPROTO_ICMP :
3908			icmp = fin->fin_dp;
3909
3910			/*
3911			 * This is an incoming packet, so the destination is
3912			 * the icmp_id and the source port equals 0
3913			 */
3914			if (nat_icmpquerytype4(icmp->icmp_type))
3915				nflags = IPN_ICMPQUERY;
3916			break;
3917		default :
3918			break;
3919		}
3920
3921		if ((nflags & IPN_TCPUDP))
3922			tcp = fin->fin_dp;
3923	}
3924
3925	ipa = fin->fin_saddr;
3926
3927	READ_ENTER(&ifs->ifs_ipf_nat);
3928
3929	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3930	    (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3931		/*EMPTY*/;
3932	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3933		natadd = 0;
3934	else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3935				      fin->fin_src, fin->fin_dst))) {
3936		nflags = nat->nat_flags;
3937	} else {
3938		u_32_t hv, msk, nmsk;
3939
3940		/*
3941		 * There is no current entry in the nat table for this packet.
3942		 *
3943		 * If the packet is a fragment, but not the first fragment,
3944		 * then don't do anything.  Otherwise, if there is a matching
3945		 * nat rule, try to create a new nat entry.
3946		 */
3947		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
3948			goto nonatfrag;
3949
3950		msk = 0xffffffff;
3951		nmsk = ifs->ifs_nat_masks;
3952maskloop:
3953		iph = ipa & htonl(msk);
3954		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3955		for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3956			npnext = np->in_mnext;
3957			if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3958				continue;
3959			if (np->in_v != fin->fin_v)
3960				continue;
3961			if (np->in_p && (np->in_p != fin->fin_p))
3962				continue;
3963			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3964				continue;
3965			if (np->in_flags & IPN_FILTER) {
3966				if (!nat_match(fin, np))
3967					continue;
3968			} else if ((ipa & np->in_inmsk) != np->in_inip)
3969				continue;
3970
3971			if ((fr != NULL) &&
3972			    !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3973				continue;
3974
3975			if (*np->in_plabel != '\0') {
3976				if (((np->in_flags & IPN_FILTER) == 0) &&
3977				    (np->in_dport != tcp->th_dport))
3978					continue;
3979				if (appr_ok(fin, tcp, np) == 0)
3980					continue;
3981			}
3982
3983			ATOMIC_INC32(np->in_use);
3984			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3985			WRITE_ENTER(&ifs->ifs_ipf_nat);
3986			nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3987			if (nat != NULL) {
3988				np->in_use--;
3989				np->in_hits++;
3990				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3991				break;
3992			}
3993			natfailed = -1;
3994			npnext = np->in_mnext;
3995			fr_ipnatderef(&np, ifs);
3996			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3997		}
3998		if ((np == NULL) && (nmsk != 0)) {
3999			while (nmsk) {
4000				msk <<= 1;
4001				if (nmsk & 0x80000000)
4002					break;
4003				nmsk <<= 1;
4004			}
4005			if (nmsk != 0) {
4006				nmsk <<= 1;
4007				goto maskloop;
4008			}
4009		}
4010	}
4011
4012nonatfrag:
4013	if (nat != NULL) {
4014		rval = fr_natout(fin, nat, natadd, nflags);
4015		if (rval == 1) {
4016			MUTEX_ENTER(&nat->nat_lock);
4017			nat_update(fin, nat, nat->nat_ptr);
4018			nat->nat_bytes[1] += fin->fin_plen;
4019			nat->nat_pkts[1]++;
4020			fin->fin_pktnum = nat->nat_pkts[1];
4021			MUTEX_EXIT(&nat->nat_lock);
4022		}
4023	} else
4024		rval = natfailed;
4025	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4026
4027	if (rval == -1) {
4028		if (passp != NULL)
4029			*passp = FR_BLOCK;
4030		fin->fin_flx |= FI_BADNAT;
4031	}
4032	fin->fin_ifp = sifp;
4033	return rval;
4034}
4035
4036/* ------------------------------------------------------------------------ */
4037/* Function:    fr_natout                                                   */
4038/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4039/*                     1 == packet was successfully translated.             */
4040/* Parameters:  fin(I)    - pointer to packet information                   */
4041/*              nat(I)    - pointer to NAT structure                        */
4042/*              natadd(I) - flag indicating if it is safe to add frag cache */
4043/*              nflags(I) - NAT flags set for this packet                   */
4044/*                                                                          */
4045/* Translate a packet coming "out" on an interface.                         */
4046/* ------------------------------------------------------------------------ */
4047int fr_natout(fin, nat, natadd, nflags)
4048fr_info_t *fin;
4049nat_t *nat;
4050int natadd;
4051u_32_t nflags;
4052{
4053	icmphdr_t *icmp;
4054	u_short *csump;
4055	u_32_t sumd;
4056	tcphdr_t *tcp;
4057	ipnat_t *np;
4058	int i;
4059	ipf_stack_t *ifs = fin->fin_ifs;
4060
4061	if (fin->fin_v == 6) {
4062#ifdef	USE_INET6
4063		return fr_nat6out(fin, nat, natadd, nflags);
4064#else
4065		return NULL;
4066#endif
4067	}
4068
4069#if defined(SOLARIS) && defined(_KERNEL)
4070	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4071#endif
4072
4073	tcp = NULL;
4074	icmp = NULL;
4075	csump = NULL;
4076	np = nat->nat_ptr;
4077
4078	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4079		(void) fr_nat_newfrag(fin, 0, nat);
4080
4081	/*
4082	 * Fix up checksums, not by recalculating them, but
4083	 * simply computing adjustments.
4084	 * This is only done for STREAMS based IP implementations where the
4085	 * checksum has already been calculated by IP.  In all other cases,
4086	 * IPFilter is called before the checksum needs calculating so there
4087	 * is no call to modify whatever is in the header now.
4088	 */
4089	ASSERT(fin->fin_m != NULL);
4090	if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4091		if (nflags == IPN_ICMPERR) {
4092			u_32_t s1, s2;
4093
4094			s1 = LONG_SUM(ntohl(fin->fin_saddr));
4095			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4096			CALC_SUMD(s1, s2, sumd);
4097
4098			fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4099		}
4100#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4101    defined(linux) || defined(BRIDGE_IPF)
4102		else {
4103			/*
4104			 * Strictly speaking, this isn't necessary on BSD
4105			 * kernels because they do checksum calculation after
4106			 * this code has run BUT if ipfilter is being used
4107			 * to do NAT as a bridge, that code doesn't exist.
4108			 */
4109			if (nat->nat_dir == NAT_OUTBOUND)
4110				fix_outcksum(&fin->fin_ip->ip_sum,
4111					    nat->nat_ipsumd);
4112			else
4113				fix_incksum(&fin->fin_ip->ip_sum,
4114				 	   nat->nat_ipsumd);
4115		}
4116#endif
4117	}
4118
4119	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4120		if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4121			tcp = fin->fin_dp;
4122
4123			tcp->th_sport = nat->nat_outport;
4124			fin->fin_data[0] = ntohs(nat->nat_outport);
4125		}
4126
4127		if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4128			icmp = fin->fin_dp;
4129			icmp->icmp_id = nat->nat_outport;
4130		}
4131
4132		csump = nat_proto(fin, nat, nflags);
4133	}
4134
4135	fin->fin_ip->ip_src = nat->nat_outip;
4136
4137	/*
4138	 * The above comments do not hold for layer 4 (or higher) checksums...
4139	 */
4140	if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4141		if (nflags & IPN_TCPUDP &&
4142	   	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4143			sumd = nat->nat_sumd[1];
4144		else
4145			sumd = nat->nat_sumd[0];
4146
4147		if (nat->nat_dir == NAT_OUTBOUND)
4148			fix_outcksum(csump, sumd);
4149		else
4150			fix_incksum(csump, sumd);
4151	}
4152#ifdef	IPFILTER_SYNC
4153	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4154#endif
4155	/* ------------------------------------------------------------- */
4156	/* A few quick notes:						 */
4157	/*	Following are test conditions prior to calling the 	 */
4158	/*	appr_check routine.					 */
4159	/*								 */
4160	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4161	/*	with a redirect rule, we attempt to match the packet's	 */
4162	/*	source port against in_dport, otherwise	we'd compare the */
4163	/*	packet's destination.			 		 */
4164	/* ------------------------------------------------------------- */
4165	if ((np != NULL) && (np->in_apr != NULL)) {
4166		i = appr_check(fin, nat);
4167		if (i == 0)
4168			i = 1;
4169	} else
4170		i = 1;
4171	ifs->ifs_nat_stats.ns_mapped[1]++;
4172	fin->fin_flx |= FI_NATED;
4173	return i;
4174}
4175
4176
4177/* ------------------------------------------------------------------------ */
4178/* Function:    fr_checknatin                                               */
4179/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4180/*                     0 == no packet translation occurred,                 */
4181/*                     1 == packet was successfully translated.             */
4182/* Parameters:  fin(I)   - pointer to packet information                    */
4183/*              passp(I) - pointer to filtering result flags                */
4184/*                                                                          */
4185/* Check to see if an incoming packet should be changed.  ICMP packets are  */
4186/* first checked to see if they match an existing entry (if an error),      */
4187/* otherwise a search of the current NAT table is made.  If neither results */
4188/* in a match then a search for a matching NAT rule is made.  Create a new  */
4189/* NAT entry if a we matched a NAT rule.  Lastly, actually change the       */
4190/* packet header(s) as required.                                            */
4191/* ------------------------------------------------------------------------ */
4192int fr_checknatin(fin, passp)
4193fr_info_t *fin;
4194u_32_t *passp;
4195{
4196	u_int nflags, natadd;
4197	ipnat_t *np, *npnext;
4198	int rval, natfailed;
4199	struct ifnet *ifp;
4200	struct in_addr in;
4201	icmphdr_t *icmp;
4202	tcphdr_t *tcp;
4203	u_short dport;
4204	nat_t *nat;
4205	u_32_t iph;
4206	ipf_stack_t *ifs = fin->fin_ifs;
4207
4208	if (ifs->ifs_fr_nat_lock != 0)
4209		return 0;
4210	if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
4211		return 0;
4212
4213	tcp = NULL;
4214	icmp = NULL;
4215	dport = 0;
4216	natadd = 1;
4217	nflags = 0;
4218	natfailed = 0;
4219	ifp = fin->fin_ifp;
4220
4221	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4222		switch (fin->fin_p)
4223		{
4224		case IPPROTO_TCP :
4225			nflags = IPN_TCP;
4226			break;
4227		case IPPROTO_UDP :
4228			nflags = IPN_UDP;
4229			break;
4230		case IPPROTO_ICMP :
4231			icmp = fin->fin_dp;
4232
4233			/*
4234			 * This is an incoming packet, so the destination is
4235			 * the icmp_id and the source port equals 0
4236			 */
4237			if (nat_icmpquerytype4(icmp->icmp_type)) {
4238				nflags = IPN_ICMPQUERY;
4239				dport = icmp->icmp_id;
4240			} break;
4241		default :
4242			break;
4243		}
4244
4245		if ((nflags & IPN_TCPUDP)) {
4246			tcp = fin->fin_dp;
4247			dport = tcp->th_dport;
4248		}
4249	}
4250
4251	in = fin->fin_dst;
4252
4253	READ_ENTER(&ifs->ifs_ipf_nat);
4254
4255	if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4256	    (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4257		/*EMPTY*/;
4258	else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4259		natadd = 0;
4260	else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4261				     fin->fin_src, in))) {
4262		nflags = nat->nat_flags;
4263	} else {
4264		u_32_t hv, msk, rmsk;
4265
4266		/*
4267		 * There is no current entry in the nat table for this packet.
4268		 *
4269		 * If the packet is a fragment, but not the first fragment,
4270		 * then don't do anything.  Otherwise, if there is a matching
4271		 * nat rule, try to create a new nat entry.
4272		 */
4273		if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4274			goto nonatfrag;
4275
4276		rmsk = ifs->ifs_rdr_masks;
4277		msk = 0xffffffff;
4278maskloop:
4279		iph = in.s_addr & htonl(msk);
4280		hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4281		for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4282			npnext = np->in_rnext;
4283			if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4284				continue;
4285			if (np->in_v != fin->fin_v)
4286				continue;
4287			if (np->in_p && (np->in_p != fin->fin_p))
4288				continue;
4289			if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4290				continue;
4291			if (np->in_flags & IPN_FILTER) {
4292				if (!nat_match(fin, np))
4293					continue;
4294			} else {
4295				if ((in.s_addr & np->in_outmsk) != np->in_outip)
4296					continue;
4297				if (np->in_pmin &&
4298				    ((ntohs(np->in_pmax) < ntohs(dport)) ||
4299				     (ntohs(dport) < ntohs(np->in_pmin))))
4300					continue;
4301			}
4302
4303			if (*np->in_plabel != '\0') {
4304				if (!appr_ok(fin, tcp, np)) {
4305					continue;
4306				}
4307			}
4308
4309			ATOMIC_INC32(np->in_use);
4310			RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4311			WRITE_ENTER(&ifs->ifs_ipf_nat);
4312			nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4313			if (nat != NULL) {
4314				np->in_use--;
4315				np->in_hits++;
4316				MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4317				break;
4318			}
4319			natfailed = -1;
4320			npnext = np->in_rnext;
4321			fr_ipnatderef(&np, ifs);
4322			MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4323		}
4324
4325		if ((np == NULL) && (rmsk != 0)) {
4326			while (rmsk) {
4327				msk <<= 1;
4328				if (rmsk & 0x80000000)
4329					break;
4330				rmsk <<= 1;
4331			}
4332			if (rmsk != 0) {
4333				rmsk <<= 1;
4334				goto maskloop;
4335			}
4336		}
4337	}
4338
4339nonatfrag:
4340	if (nat != NULL) {
4341		rval = fr_natin(fin, nat, natadd, nflags);
4342		if (rval == 1) {
4343			MUTEX_ENTER(&nat->nat_lock);
4344			nat_update(fin, nat, nat->nat_ptr);
4345			nat->nat_bytes[0] += fin->fin_plen;
4346			nat->nat_pkts[0]++;
4347			fin->fin_pktnum = nat->nat_pkts[0];
4348			MUTEX_EXIT(&nat->nat_lock);
4349		}
4350	} else
4351		rval = natfailed;
4352	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4353
4354	if (rval == -1) {
4355		if (passp != NULL)
4356			*passp = FR_BLOCK;
4357		fin->fin_flx |= FI_BADNAT;
4358	}
4359	return rval;
4360}
4361
4362
4363/* ------------------------------------------------------------------------ */
4364/* Function:    fr_natin                                                    */
4365/* Returns:     int - -1 == packet failed NAT checks so block it,           */
4366/*                     1 == packet was successfully translated.             */
4367/* Parameters:  fin(I)    - pointer to packet information                   */
4368/*              nat(I)    - pointer to NAT structure                        */
4369/*              natadd(I) - flag indicating if it is safe to add frag cache */
4370/*              nflags(I) - NAT flags set for this packet                   */
4371/* Locks Held:  ipf_nat (READ)                                              */
4372/*                                                                          */
4373/* Translate a packet coming "in" on an interface.                          */
4374/* ------------------------------------------------------------------------ */
4375int fr_natin(fin, nat, natadd, nflags)
4376fr_info_t *fin;
4377nat_t *nat;
4378int natadd;
4379u_32_t nflags;
4380{
4381	icmphdr_t *icmp;
4382	u_short *csump;
4383	tcphdr_t *tcp;
4384	ipnat_t *np;
4385	int i;
4386	ipf_stack_t *ifs = fin->fin_ifs;
4387
4388	if (fin->fin_v == 6) {
4389#ifdef	USE_INET6
4390		return fr_nat6in(fin, nat, natadd, nflags);
4391#else
4392		return NULL;
4393#endif
4394	}
4395
4396#if defined(SOLARIS) && defined(_KERNEL)
4397	net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4398#endif
4399
4400	tcp = NULL;
4401	csump = NULL;
4402	np = nat->nat_ptr;
4403	fin->fin_fr = nat->nat_fr;
4404
4405	if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4406		(void) fr_nat_newfrag(fin, 0, nat);
4407
4408	if (np != NULL) {
4409
4410	/* ------------------------------------------------------------- */
4411	/* A few quick notes:						 */
4412	/*	Following are test conditions prior to calling the 	 */
4413	/*	appr_check routine.					 */
4414	/*								 */
4415	/* 	A NULL tcp indicates a non TCP/UDP packet.  When dealing */
4416	/*	with a map rule, we attempt to match the packet's	 */
4417	/*	source port against in_dport, otherwise	we'd compare the */
4418	/*	packet's destination.			 		 */
4419	/* ------------------------------------------------------------- */
4420		if (np->in_apr != NULL) {
4421			i = appr_check(fin, nat);
4422			if (i == -1) {
4423				return -1;
4424			}
4425		}
4426	}
4427
4428#ifdef	IPFILTER_SYNC
4429	ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4430#endif
4431
4432	fin->fin_ip->ip_dst = nat->nat_inip;
4433	fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4434	if (nflags & IPN_TCPUDP)
4435		tcp = fin->fin_dp;
4436
4437	/*
4438	 * Fix up checksums, not by recalculating them, but
4439	 * simply computing adjustments.
4440	 * Why only do this for some platforms on inbound packets ?
4441	 * Because for those that it is done, IP processing is yet to happen
4442	 * and so the IPv4 header checksum has not yet been evaluated.
4443	 * Perhaps it should always be done for the benefit of things like
4444	 * fast forwarding (so that it doesn't need to be recomputed) but with
4445	 * header checksum offloading, perhaps it is a moot point.
4446	 */
4447#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4448     defined(__osf__) || defined(linux)
4449	if (nat->nat_dir == NAT_OUTBOUND)
4450		fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4451	else
4452		fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4453#endif
4454
4455	if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4456		if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4457			tcp->th_dport = nat->nat_inport;
4458			fin->fin_data[1] = ntohs(nat->nat_inport);
4459		}
4460
4461
4462		if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4463			icmp = fin->fin_dp;
4464
4465			icmp->icmp_id = nat->nat_inport;
4466		}
4467
4468		csump = nat_proto(fin, nat, nflags);
4469	}
4470
4471	/*
4472	 * In case they are being forwarded, inbound packets always need to have
4473	 * their checksum adjusted even if hardware checksum validation said OK.
4474	 */
4475	if (csump != NULL) {
4476		if (nat->nat_dir == NAT_OUTBOUND)
4477			fix_incksum(csump, nat->nat_sumd[0]);
4478		else
4479			fix_outcksum(csump, nat->nat_sumd[0]);
4480	}
4481
4482#if defined(SOLARIS) && defined(_KERNEL)
4483	if (nflags & IPN_TCPUDP &&
4484	    NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4485		/*
4486		 * Need to adjust the partial checksum result stored in
4487		 * db_cksum16, which will be used for validation in IP.
4488		 * See IP_CKSUM_RECV().
4489		 * Adjustment data should be the inverse of the IP address
4490		 * changes, because db_cksum16 is supposed to be the complement
4491		 * of the pesudo header.
4492		 */
4493		csump = &fin->fin_m->b_datap->db_cksum16;
4494		if (nat->nat_dir == NAT_OUTBOUND)
4495			fix_outcksum(csump, nat->nat_sumd[1]);
4496		else
4497			fix_incksum(csump, nat->nat_sumd[1]);
4498	}
4499#endif
4500
4501	ifs->ifs_nat_stats.ns_mapped[0]++;
4502	fin->fin_flx |= FI_NATED;
4503	if (np != NULL && np->in_tag.ipt_num[0] != 0)
4504		fin->fin_nattag = &np->in_tag;
4505	return 1;
4506}
4507
4508
4509/* ------------------------------------------------------------------------ */
4510/* Function:    nat_proto                                                   */
4511/* Returns:     u_short* - pointer to transport header checksum to update,  */
4512/*                         NULL if the transport protocol is not recognised */
4513/*                         as needing a checksum update.                    */
4514/* Parameters:  fin(I)    - pointer to packet information                   */
4515/*              nat(I)    - pointer to NAT structure                        */
4516/*              nflags(I) - NAT flags set for this packet                   */
4517/*                                                                          */
4518/* Return the pointer to the checksum field for each protocol so understood.*/
4519/* If support for making other changes to a protocol header is required,    */
4520/* that is not strictly 'address' translation, such as clamping the MSS in  */
4521/* TCP down to a specific value, then do it from here.                      */
4522/* ------------------------------------------------------------------------ */
4523u_short *nat_proto(fin, nat, nflags)
4524fr_info_t *fin;
4525nat_t *nat;
4526u_int nflags;
4527{
4528	icmphdr_t *icmp;
4529	struct icmp6_hdr *icmp6;
4530	u_short *csump;
4531	tcphdr_t *tcp;
4532	udphdr_t *udp;
4533
4534	csump = NULL;
4535	if (fin->fin_out == 0) {
4536		fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4537	} else {
4538		fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4539	}
4540
4541	switch (fin->fin_p)
4542	{
4543	case IPPROTO_TCP :
4544		tcp = fin->fin_dp;
4545
4546		csump = &tcp->th_sum;
4547
4548		/*
4549		 * Do a MSS CLAMPING on a SYN packet,
4550		 * only deal IPv4 for now.
4551		 */
4552		if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4553			nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4554
4555		break;
4556
4557	case IPPROTO_UDP :
4558		udp = fin->fin_dp;
4559
4560		if (udp->uh_sum)
4561			csump = &udp->uh_sum;
4562		break;
4563
4564	case IPPROTO_ICMP :
4565		icmp = fin->fin_dp;
4566
4567		if ((nflags & IPN_ICMPQUERY) != 0) {
4568			if (icmp->icmp_cksum != 0)
4569				csump = &icmp->icmp_cksum;
4570		}
4571		break;
4572
4573	case IPPROTO_ICMPV6 :
4574		icmp6 = fin->fin_dp;
4575
4576		if ((nflags & IPN_ICMPQUERY) != 0) {
4577			if (icmp6->icmp6_cksum != 0)
4578				csump = &icmp6->icmp6_cksum;
4579		}
4580		break;
4581	}
4582	return csump;
4583}
4584
4585
4586/* ------------------------------------------------------------------------ */
4587/* Function:    fr_natunload                                                */
4588/* Returns:     Nil                                                         */
4589/* Parameters:  ifs - ipf stack instance                                  */
4590/*                                                                          */
4591/* Free all memory used by NAT structures allocated at runtime.             */
4592/* ------------------------------------------------------------------------ */
4593void fr_natunload(ifs)
4594ipf_stack_t *ifs;
4595{
4596	ipftq_t *ifq, *ifqnext;
4597
4598	(void) nat_clearlist(ifs);
4599	(void) nat_flushtable(FLUSH_TABLE_ALL, ifs);
4600
4601	/*
4602	 * Proxy timeout queues are not cleaned here because although they
4603	 * exist on the NAT list, appr_unload is called after fr_natunload
4604	 * and the proxies actually are responsible for them being created.
4605	 * Should the proxy timeouts have their own list?  There's no real
4606	 * justification as this is the only complication.
4607	 */
4608	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4609		ifqnext = ifq->ifq_next;
4610		if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4611		    (fr_deletetimeoutqueue(ifq) == 0))
4612			fr_freetimeoutqueue(ifq, ifs);
4613	}
4614
4615	if (ifs->ifs_nat_table[0] != NULL) {
4616		KFREES(ifs->ifs_nat_table[0],
4617		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4618		ifs->ifs_nat_table[0] = NULL;
4619	}
4620	if (ifs->ifs_nat_table[1] != NULL) {
4621		KFREES(ifs->ifs_nat_table[1],
4622		       sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4623		ifs->ifs_nat_table[1] = NULL;
4624	}
4625	if (ifs->ifs_nat_rules != NULL) {
4626		KFREES(ifs->ifs_nat_rules,
4627		       sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4628		ifs->ifs_nat_rules = NULL;
4629	}
4630	if (ifs->ifs_rdr_rules != NULL) {
4631		KFREES(ifs->ifs_rdr_rules,
4632		       sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4633		ifs->ifs_rdr_rules = NULL;
4634	}
4635	if (ifs->ifs_maptable != NULL) {
4636		KFREES(ifs->ifs_maptable,
4637		       sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4638		ifs->ifs_maptable = NULL;
4639	}
4640	if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4641		KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4642		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4643		ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4644	}
4645	if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4646		KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4647		       sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4648		ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4649	}
4650
4651	if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4652		ifs->ifs_fr_nat_maxbucket = 0;
4653
4654	if (ifs->ifs_fr_nat_init == 1) {
4655		ifs->ifs_fr_nat_init = 0;
4656		fr_sttab_destroy(ifs->ifs_nat_tqb);
4657
4658		RW_DESTROY(&ifs->ifs_ipf_natfrag);
4659		RW_DESTROY(&ifs->ifs_ipf_nat);
4660
4661		MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4662		MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4663
4664		MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4665		MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4666		MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4667	}
4668}
4669
4670
4671/* ------------------------------------------------------------------------ */
4672/* Function:    fr_natexpire                                                */
4673/* Returns:     Nil                                                         */
4674/* Parameters:  ifs - ipf stack instance                                    */
4675/*                                                                          */
4676/* Check all of the timeout queues for entries at the top which need to be  */
4677/* expired.                                                                 */
4678/* ------------------------------------------------------------------------ */
4679void fr_natexpire(ifs)
4680ipf_stack_t *ifs;
4681{
4682	ipftq_t *ifq, *ifqnext;
4683	ipftqent_t *tqe, *tqn;
4684	int i;
4685	SPL_INT(s);
4686
4687	SPL_NET(s);
4688	WRITE_ENTER(&ifs->ifs_ipf_nat);
4689	for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4690		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4691			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4692				break;
4693			tqn = tqe->tqe_next;
4694			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4695		}
4696	}
4697
4698	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4699		ifqnext = ifq->ifq_next;
4700
4701		for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4702			if (tqe->tqe_die > ifs->ifs_fr_ticks)
4703				break;
4704			tqn = tqe->tqe_next;
4705			(void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4706		}
4707	}
4708
4709	for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4710		ifqnext = ifq->ifq_next;
4711
4712		if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4713		    (ifq->ifq_ref == 0)) {
4714			fr_freetimeoutqueue(ifq, ifs);
4715		}
4716	}
4717
4718	if (ifs->ifs_nat_doflush != 0) {
4719		(void) nat_flushtable(FLUSH_TABLE_EXTRA, ifs);
4720		ifs->ifs_nat_doflush = 0;
4721	}
4722
4723	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4724	SPL_X(s);
4725}
4726
4727
4728/* ------------------------------------------------------------------------ */
4729/* Function:    fr_nataddrsync                                              */
4730/* Returns:     Nil                                                         */
4731/* Parameters:  ifp(I) -  pointer to network interface                      */
4732/*              addr(I) - pointer to new network address                    */
4733/*                                                                          */
4734/* Walk through all of the currently active NAT sessions, looking for those */
4735/* which need to have their translated address updated (where the interface */
4736/* matches the one passed in) and change it, recalculating the checksum sum */
4737/* difference too.                                                          */
4738/* ------------------------------------------------------------------------ */
4739void fr_nataddrsync(v, ifp, addr, ifs)
4740int v;
4741void *ifp;
4742void *addr;
4743ipf_stack_t *ifs;
4744{
4745	u_32_t sum1, sum2, sumd;
4746	nat_t *nat;
4747	ipnat_t *np;
4748	SPL_INT(s);
4749
4750	sum1 = 0;
4751	sum2 = 0;
4752	if (ifs->ifs_fr_running <= 0)
4753		return;
4754
4755	SPL_NET(s);
4756	WRITE_ENTER(&ifs->ifs_ipf_nat);
4757
4758	if (ifs->ifs_fr_running <= 0) {
4759		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4760		return;
4761	}
4762
4763	/*
4764	 * Change IP addresses for NAT sessions for any protocol except TCP
4765	 * since it will break the TCP connection anyway.  The only rules
4766	 * which will get changed are those which are "map ... -> 0/32",
4767	 * where the rule specifies the address is taken from the interface.
4768	 */
4769	for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4770		if (addr != NULL) {
4771			if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4772			    ((nat->nat_flags & IPN_TCP) != 0))
4773				continue;
4774			if ((np = nat->nat_ptr) == NULL)
4775				continue;
4776			if (v == 4 && np->in_v == 4) {
4777				if (np->in_nip || np->in_outmsk != 0xffffffff)
4778					continue;
4779				/*
4780				 * Change the map-to address to be the same as
4781				 * the new one.
4782				 */
4783				sum1 = nat->nat_outip.s_addr;
4784				nat->nat_outip = *(struct in_addr *)addr;
4785				sum2 = nat->nat_outip.s_addr;
4786			} else if (v == 6 && np->in_v == 6) {
4787				if (!IP6_ISZERO(&np->in_next6.in6) ||
4788				    !IP6_ISONES(&np->in_out[1].in6))
4789					continue;
4790				/*
4791				 * Change the map-to address to be the same as
4792				 * the new one.
4793				 */
4794				nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4795			} else
4796				continue;
4797
4798		} else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4799		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4800			if (np->in_v == 4 && (v == 4 || v == 0)) {
4801				struct in_addr in;
4802				if (np->in_outmsk != 0xffffffff || np->in_nip)
4803					continue;
4804				/*
4805				 * Change the map-to address to be the same as
4806				 * the new one.
4807				 */
4808				sum1 = nat->nat_outip.s_addr;
4809				if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4810					       &in, NULL, ifs) != -1)
4811					nat->nat_outip = in;
4812				sum2 = nat->nat_outip.s_addr;
4813			} else if (np->in_v == 6 && (v == 6 || v == 0)) {
4814				struct in6_addr in6;
4815				if (!IP6_ISZERO(&np->in_next6.in6) ||
4816				    !IP6_ISONES(&np->in_out[1].in6))
4817					continue;
4818				/*
4819				 * Change the map-to address to be the same as
4820				 * the new one.
4821				 */
4822				if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4823					       (void *)&in6, NULL, ifs) != -1)
4824					nat->nat_outip6.in6 = in6;
4825			} else
4826				continue;
4827		} else {
4828			continue;
4829		}
4830
4831		if (sum1 == sum2)
4832			continue;
4833		/*
4834		 * Readjust the checksum adjustment to take into
4835		 * account the new IP#.
4836		 */
4837		CALC_SUMD(sum1, sum2, sumd);
4838		/* XXX - dont change for TCP when solaris does
4839		 * hardware checksumming.
4840		 */
4841		sumd += nat->nat_sumd[0];
4842		nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4843		nat->nat_sumd[1] = nat->nat_sumd[0];
4844	}
4845
4846	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4847	SPL_X(s);
4848}
4849
4850
4851/* ------------------------------------------------------------------------ */
4852/* Function:    fr_natifpsync                                               */
4853/* Returns:     Nil                                                         */
4854/* Parameters:  action(I) - how we are syncing                              */
4855/*              ifp(I)    - pointer to network interface                    */
4856/*              name(I)   - name of interface to sync to                    */
4857/*                                                                          */
4858/* This function is used to resync the mapping of interface names and their */
4859/* respective 'pointers'.  For "action == IPFSYNC_RESYNC", resync all       */
4860/* interfaces by doing a new lookup of name to 'pointer'.  For "action ==   */
4861/* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with      */
4862/* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which    */
4863/* there is no longer any interface associated with it.                     */
4864/* ------------------------------------------------------------------------ */
4865void fr_natifpsync(action, v, ifp, name, ifs)
4866int action, v;
4867void *ifp;
4868char *name;
4869ipf_stack_t *ifs;
4870{
4871#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4872	int s;
4873#endif
4874	nat_t *nat;
4875	ipnat_t *n;
4876	int nv;
4877
4878	if (ifs->ifs_fr_running <= 0)
4879		return;
4880
4881	SPL_NET(s);
4882	WRITE_ENTER(&ifs->ifs_ipf_nat);
4883
4884	if (ifs->ifs_fr_running <= 0) {
4885		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4886		return;
4887	}
4888
4889	switch (action)
4890	{
4891	case IPFSYNC_RESYNC :
4892		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4893			nv = (v == 0) ? nat->nat_v : v;
4894			if (nat->nat_v != nv)
4895				continue;
4896			if ((ifp == nat->nat_ifps[0]) ||
4897			    (nat->nat_ifps[0] == (void *)-1)) {
4898				nat->nat_ifps[0] =
4899				    fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4900			}
4901
4902			if ((ifp == nat->nat_ifps[1]) ||
4903			    (nat->nat_ifps[1] == (void *)-1)) {
4904				nat->nat_ifps[1] =
4905				    fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4906			}
4907		}
4908
4909		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4910			nv = (v == 0) ? (int)n->in_v : v;
4911			if ((int)n->in_v != nv)
4912				continue;
4913			if (n->in_ifps[0] == ifp ||
4914			    n->in_ifps[0] == (void *)-1) {
4915				n->in_ifps[0] =
4916				    fr_resolvenic(n->in_ifnames[0], nv, ifs);
4917			}
4918			if (n->in_ifps[1] == ifp ||
4919			    n->in_ifps[1] == (void *)-1) {
4920				n->in_ifps[1] =
4921				    fr_resolvenic(n->in_ifnames[1], nv, ifs);
4922			}
4923		}
4924		break;
4925	case IPFSYNC_NEWIFP :
4926		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4927			if (nat->nat_v != v)
4928				continue;
4929			if (!strncmp(name, nat->nat_ifnames[0],
4930				     sizeof(nat->nat_ifnames[0])))
4931				nat->nat_ifps[0] = ifp;
4932			if (!strncmp(name, nat->nat_ifnames[1],
4933				     sizeof(nat->nat_ifnames[1])))
4934				nat->nat_ifps[1] = ifp;
4935		}
4936		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4937			if ((int)n->in_v != v)
4938				continue;
4939			if (!strncmp(name, n->in_ifnames[0],
4940				     sizeof(n->in_ifnames[0])))
4941				n->in_ifps[0] = ifp;
4942			if (!strncmp(name, n->in_ifnames[1],
4943				     sizeof(n->in_ifnames[1])))
4944				n->in_ifps[1] = ifp;
4945		}
4946		break;
4947	case IPFSYNC_OLDIFP :
4948		for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4949			if (nat->nat_v != v)
4950				continue;
4951			if (ifp == nat->nat_ifps[0])
4952				nat->nat_ifps[0] = (void *)-1;
4953			if (ifp == nat->nat_ifps[1])
4954				nat->nat_ifps[1] = (void *)-1;
4955		}
4956		for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4957			if ((int)n->in_v != v)
4958				continue;
4959			if (n->in_ifps[0] == ifp)
4960				n->in_ifps[0] = (void *)-1;
4961			if (n->in_ifps[1] == ifp)
4962				n->in_ifps[1] = (void *)-1;
4963		}
4964		break;
4965	}
4966	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4967	SPL_X(s);
4968}
4969
4970#if SOLARIS2 >= 10
4971/* ------------------------------------------------------------------------ */
4972/* Function:	fr_natifindexsync					    */
4973/* Returns:	void							    */
4974/* Parameters:	ifp	  - interface, which is being sync'd		    */
4975/*		newifp	  - new ifindex value for interface		    */
4976/*              ifs	  - IPF's stack					    */
4977/*                                                                          */
4978/* Write Locks: assumes ipf_mutex is locked				    */
4979/*                                                                          */
4980/* Updates all interface index references in NAT rules and NAT entries.	    */
4981/* the index, which is about to be updated must match ifp value.	    */
4982/* ------------------------------------------------------------------------ */
4983void fr_natifindexsync(ifp, newifp, ifs)
4984void *ifp;
4985void *newifp;
4986ipf_stack_t *ifs;
4987{
4988	nat_t *nat;
4989	ipnat_t *n;
4990
4991	WRITE_ENTER(&ifs->ifs_ipf_nat);
4992
4993	for (nat = ifs->ifs_nat_instances; nat != NULL; nat = nat->nat_next) {
4994		if (ifp == nat->nat_ifps[0])
4995			nat->nat_ifps[0] = newifp;
4996
4997		if (ifp == nat->nat_ifps[1])
4998			nat->nat_ifps[1] = newifp;
4999	}
5000
5001	for (n = ifs->ifs_nat_list; n != NULL; n = n->in_next) {
5002		if (ifp == n->in_ifps[0])
5003			n->in_ifps[0] = newifp;
5004
5005		if (ifp == n->in_ifps[1])
5006			n->in_ifps[1] = newifp;
5007	}
5008
5009	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5010}
5011#endif
5012
5013/* ------------------------------------------------------------------------ */
5014/* Function:    nat_icmpquerytype4                                          */
5015/* Returns:     int - 1 == success, 0 == failure                            */
5016/* Parameters:  icmptype(I) - ICMP type number                              */
5017/*                                                                          */
5018/* Tests to see if the ICMP type number passed is a query/response type or  */
5019/* not.                                                                     */
5020/* ------------------------------------------------------------------------ */
5021static INLINE int nat_icmpquerytype4(icmptype)
5022int icmptype;
5023{
5024
5025	/*
5026	 * For the ICMP query NAT code, it is essential that both the query
5027	 * and the reply match on the NAT rule. Because the NAT structure
5028	 * does not keep track of the icmptype, and a single NAT structure
5029	 * is used for all icmp types with the same src, dest and id, we
5030	 * simply define the replies as queries as well. The funny thing is,
5031	 * altough it seems silly to call a reply a query, this is exactly
5032	 * as it is defined in the IPv4 specification
5033	 */
5034
5035	switch (icmptype)
5036	{
5037
5038	case ICMP_ECHOREPLY:
5039	case ICMP_ECHO:
5040	/* route aedvertisement/solliciation is currently unsupported: */
5041	/* it would require rewriting the ICMP data section            */
5042	case ICMP_TSTAMP:
5043	case ICMP_TSTAMPREPLY:
5044	case ICMP_IREQ:
5045	case ICMP_IREQREPLY:
5046	case ICMP_MASKREQ:
5047	case ICMP_MASKREPLY:
5048		return 1;
5049	default:
5050		return 0;
5051	}
5052}
5053
5054
5055/* ------------------------------------------------------------------------ */
5056/* Function:    nat_log                                                     */
5057/* Returns:     Nil                                                         */
5058/* Parameters:  nat(I)  - pointer to NAT structure                          */
5059/*              type(I) - type of log entry to create                       */
5060/*                                                                          */
5061/* Creates a NAT log entry.                                                 */
5062/* ------------------------------------------------------------------------ */
5063void nat_log(nat, type, ifs)
5064struct nat *nat;
5065u_int type;
5066ipf_stack_t *ifs;
5067{
5068#ifdef	IPFILTER_LOG
5069# ifndef LARGE_NAT
5070	struct ipnat *np;
5071	int rulen;
5072# endif
5073	struct natlog natl;
5074	void *items[1];
5075	size_t sizes[1];
5076	int types[1];
5077
5078	natl.nlg_inip = nat->nat_inip6;
5079	natl.nlg_outip = nat->nat_outip6;
5080	natl.nlg_origip = nat->nat_oip6;
5081	natl.nlg_bytes[0] = nat->nat_bytes[0];
5082	natl.nlg_bytes[1] = nat->nat_bytes[1];
5083	natl.nlg_pkts[0] = nat->nat_pkts[0];
5084	natl.nlg_pkts[1] = nat->nat_pkts[1];
5085	natl.nlg_origport = nat->nat_oport;
5086	natl.nlg_inport = nat->nat_inport;
5087	natl.nlg_outport = nat->nat_outport;
5088	natl.nlg_p = nat->nat_p;
5089	natl.nlg_type = type;
5090	natl.nlg_rule = -1;
5091	natl.nlg_v = nat->nat_v;
5092# ifndef LARGE_NAT
5093	if (nat->nat_ptr != NULL) {
5094		for (rulen = 0, np = ifs->ifs_nat_list; np;
5095		     np = np->in_next, rulen++)
5096			if (np == nat->nat_ptr) {
5097				natl.nlg_rule = rulen;
5098				break;
5099			}
5100	}
5101# endif
5102	items[0] = &natl;
5103	sizes[0] = sizeof(natl);
5104	types[0] = 0;
5105
5106	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5107#endif
5108}
5109
5110
5111#if defined(__OpenBSD__)
5112/* ------------------------------------------------------------------------ */
5113/* Function:    nat_ifdetach                                                */
5114/* Returns:     Nil                                                         */
5115/* Parameters:  ifp(I) - pointer to network interface                       */
5116/*                                                                          */
5117/* Compatibility interface for OpenBSD to trigger the correct updating of   */
5118/* interface references within IPFilter.                                    */
5119/* ------------------------------------------------------------------------ */
5120void nat_ifdetach(ifp, ifs)
5121void *ifp;
5122ipf_stack_t *ifs;
5123{
5124	frsync(ifp, ifs);
5125	return;
5126}
5127#endif
5128
5129
5130/* ------------------------------------------------------------------------ */
5131/* Function:    fr_ipnatderef                                               */
5132/* Returns:     Nil                                                         */
5133/* Parameters:  inp(I) - pointer to pointer to NAT rule                     */
5134/* Write Locks: ipf_nat                                                     */
5135/*                                                                          */
5136/* ------------------------------------------------------------------------ */
5137void fr_ipnatderef(inp, ifs)
5138ipnat_t **inp;
5139ipf_stack_t *ifs;
5140{
5141	ipnat_t *in;
5142
5143	in = *inp;
5144	*inp = NULL;
5145	in->in_use--;
5146	if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5147		if (in->in_apr)
5148			appr_free(in->in_apr);
5149		KFREE(in);
5150		ifs->ifs_nat_stats.ns_rules--;
5151#ifdef notdef
5152#if SOLARIS
5153		if (ifs->ifs_nat_stats.ns_rules == 0)
5154			ifs->ifs_pfil_delayed_copy = 1;
5155#endif
5156#endif
5157	}
5158}
5159
5160
5161/* ------------------------------------------------------------------------ */
5162/* Function:    fr_natderef                                                 */
5163/* Returns:     Nil                                                         */
5164/* Parameters:  natp - pointer to pointer to NAT table entry                */
5165/*              ifs  - ipf stack instance                                   */
5166/*                                                                          */
5167/* Decrement the reference counter for this NAT table entry and free it if  */
5168/* there are no more things using it.                                       */
5169/*                                                                          */
5170/* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5171/* structure *because* it only gets called on paths _after_ nat_ref has been*/
5172/* incremented.  If nat_ref == 1 then we shouldn't decrement it here        */
5173/* because nat_delete() will do that and send nat_ref to -1.                */
5174/*                                                                          */
5175/* Holding the lock on nat_lock is required to serialise nat_delete() being */
5176/* called from a NAT flush ioctl with a deref happening because of a packet.*/
5177/* ------------------------------------------------------------------------ */
5178void fr_natderef(natp, ifs)
5179nat_t **natp;
5180ipf_stack_t *ifs;
5181{
5182	nat_t *nat;
5183
5184	nat = *natp;
5185	*natp = NULL;
5186
5187	MUTEX_ENTER(&nat->nat_lock);
5188	if (nat->nat_ref > 1) {
5189		nat->nat_ref--;
5190		MUTEX_EXIT(&nat->nat_lock);
5191		return;
5192	}
5193	MUTEX_EXIT(&nat->nat_lock);
5194
5195	WRITE_ENTER(&ifs->ifs_ipf_nat);
5196	(void) nat_delete(nat, NL_EXPIRE, ifs);
5197	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5198}
5199
5200
5201/* ------------------------------------------------------------------------ */
5202/* Function:    fr_natclone                                                 */
5203/* Returns:     ipstate_t* - NULL == cloning failed,                        */
5204/*                           else pointer to new NAT structure              */
5205/* Parameters:  fin(I)   - pointer to packet information                    */
5206/*              nat(I)   - pointer to master NAT structure                  */
5207/* Write Lock:  ipf_nat                                                     */
5208/*                                                                          */
5209/* Create a "duplicate" NAT table entry from the master.                    */
5210/* ------------------------------------------------------------------------ */
5211nat_t *fr_natclone(fin, nat)
5212fr_info_t *fin;
5213nat_t *nat;
5214{
5215	frentry_t *fr;
5216	nat_t *clone;
5217	ipnat_t *np;
5218	ipf_stack_t *ifs = fin->fin_ifs;
5219
5220	/*
5221	 * Trigger automatic call to nat_flushtable() if the
5222	 * table has reached capcity specified by hi watermark.
5223	 */
5224	if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
5225		ifs->ifs_nat_doflush = 1;
5226
5227	/*
5228	 * If automatic flushing did not do its job, and the table
5229	 * has filled up, don't try to create a new entry.
5230	 */
5231	if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
5232		ifs->ifs_nat_stats.ns_memfail++;
5233		return NULL;
5234	}
5235
5236	KMALLOC(clone, nat_t *);
5237	if (clone == NULL)
5238		return NULL;
5239	bcopy((char *)nat, (char *)clone, sizeof(*clone));
5240
5241	MUTEX_NUKE(&clone->nat_lock);
5242
5243	clone->nat_aps = NULL;
5244	/*
5245	 * Initialize all these so that nat_delete() doesn't cause a crash.
5246	 */
5247	clone->nat_tqe.tqe_pnext = NULL;
5248	clone->nat_tqe.tqe_next = NULL;
5249	clone->nat_tqe.tqe_ifq = NULL;
5250	clone->nat_tqe.tqe_parent = clone;
5251
5252	clone->nat_flags &= ~SI_CLONE;
5253	clone->nat_flags |= SI_CLONED;
5254
5255	if (clone->nat_hm)
5256		clone->nat_hm->hm_ref++;
5257
5258	if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5259		KFREE(clone);
5260		return NULL;
5261	}
5262	np = clone->nat_ptr;
5263	if (np != NULL) {
5264		if (ifs->ifs_nat_logging)
5265			nat_log(clone, (u_int)np->in_redir, ifs);
5266		np->in_use++;
5267	}
5268	fr = clone->nat_fr;
5269	if (fr != NULL) {
5270		MUTEX_ENTER(&fr->fr_lock);
5271		fr->fr_ref++;
5272		MUTEX_EXIT(&fr->fr_lock);
5273	}
5274
5275	/*
5276	 * Because the clone is created outside the normal loop of things and
5277	 * TCP has special needs in terms of state, initialise the timeout
5278	 * state of the new NAT from here.
5279	 */
5280	if (clone->nat_p == IPPROTO_TCP) {
5281		(void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5282				  clone->nat_flags);
5283	}
5284#ifdef	IPFILTER_SYNC
5285	clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5286#endif
5287	if (ifs->ifs_nat_logging)
5288		nat_log(clone, NL_CLONE, ifs);
5289	return clone;
5290}
5291
5292
5293/* ------------------------------------------------------------------------ */
5294/* Function:   nat_wildok                                                   */
5295/* Returns:    int - 1 == packet's ports match wildcards                    */
5296/*                   0 == packet's ports don't match wildcards              */
5297/* Parameters: nat(I)   - NAT entry                                         */
5298/*             sport(I) - source port                                       */
5299/*             dport(I) - destination port                                  */
5300/*             flags(I) - wildcard flags                                    */
5301/*             dir(I)   - packet direction                                  */
5302/*                                                                          */
5303/* Use NAT entry and packet direction to determine which combination of     */
5304/* wildcard flags should be used.                                           */
5305/* ------------------------------------------------------------------------ */
5306int nat_wildok(nat, sport, dport, flags, dir)
5307nat_t *nat;
5308int sport;
5309int dport;
5310int flags;
5311int dir;
5312{
5313	/*
5314	 * When called by       dir is set to
5315	 * nat_inlookup         NAT_INBOUND (0)
5316	 * nat_outlookup        NAT_OUTBOUND (1)
5317	 *
5318	 * We simply combine the packet's direction in dir with the original
5319	 * "intended" direction of that NAT entry in nat->nat_dir to decide
5320	 * which combination of wildcard flags to allow.
5321	 */
5322
5323	switch ((dir << 1) | nat->nat_dir)
5324	{
5325	case 3: /* outbound packet / outbound entry */
5326		if (((nat->nat_inport == sport) ||
5327		    (flags & SI_W_SPORT)) &&
5328		    ((nat->nat_oport == dport) ||
5329		    (flags & SI_W_DPORT)))
5330			return 1;
5331		break;
5332	case 2: /* outbound packet / inbound entry */
5333		if (((nat->nat_outport == sport) ||
5334		    (flags & SI_W_DPORT)) &&
5335		    ((nat->nat_oport == dport) ||
5336		    (flags & SI_W_SPORT)))
5337			return 1;
5338		break;
5339	case 1: /* inbound packet / outbound entry */
5340		if (((nat->nat_oport == sport) ||
5341		    (flags & SI_W_DPORT)) &&
5342		    ((nat->nat_outport == dport) ||
5343		    (flags & SI_W_SPORT)))
5344			return 1;
5345		break;
5346	case 0: /* inbound packet / inbound entry */
5347		if (((nat->nat_oport == sport) ||
5348		    (flags & SI_W_SPORT)) &&
5349		    ((nat->nat_outport == dport) ||
5350		    (flags & SI_W_DPORT)))
5351			return 1;
5352		break;
5353	default:
5354		break;
5355	}
5356
5357	return(0);
5358}
5359
5360
5361/* ------------------------------------------------------------------------ */
5362/* Function:    nat_mssclamp                                                */
5363/* Returns:     Nil                                                         */
5364/* Parameters:  tcp(I)    - pointer to TCP header                           */
5365/*              maxmss(I) - value to clamp the TCP MSS to                   */
5366/*              csump(I)  - pointer to TCP checksum                         */
5367/*                                                                          */
5368/* Check for MSS option and clamp it if necessary.  If found and changed,   */
5369/* then the TCP header checksum will be updated to reflect the change in    */
5370/* the MSS.                                                                 */
5371/* ------------------------------------------------------------------------ */
5372static void nat_mssclamp(tcp, maxmss, csump)
5373tcphdr_t *tcp;
5374u_32_t maxmss;
5375u_short *csump;
5376{
5377	u_char *cp, *ep, opt;
5378	int hlen, advance;
5379	u_32_t mss, sumd;
5380
5381	hlen = TCP_OFF(tcp) << 2;
5382	if (hlen > sizeof(*tcp)) {
5383		cp = (u_char *)tcp + sizeof(*tcp);
5384		ep = (u_char *)tcp + hlen;
5385
5386		while (cp < ep) {
5387			opt = cp[0];
5388			if (opt == TCPOPT_EOL)
5389				break;
5390			else if (opt == TCPOPT_NOP) {
5391				cp++;
5392				continue;
5393			}
5394
5395			if (cp + 1 >= ep)
5396				break;
5397			advance = cp[1];
5398			if ((cp + advance > ep) || (advance <= 0))
5399				break;
5400			switch (opt)
5401			{
5402			case TCPOPT_MAXSEG:
5403				if (advance != 4)
5404					break;
5405				mss = cp[2] * 256 + cp[3];
5406				if (mss > maxmss) {
5407					cp[2] = maxmss / 256;
5408					cp[3] = maxmss & 0xff;
5409					CALC_SUMD(mss, maxmss, sumd);
5410					fix_outcksum(csump, sumd);
5411				}
5412				break;
5413			default:
5414				/* ignore unknown options */
5415				break;
5416			}
5417
5418			cp += advance;
5419		}
5420	}
5421}
5422
5423
5424/* ------------------------------------------------------------------------ */
5425/* Function:    fr_setnatqueue                                              */
5426/* Returns:     Nil                                                         */
5427/* Parameters:  nat(I)- pointer to NAT structure                            */
5428/*              rev(I) - forward(0) or reverse(1) direction                 */
5429/* Locks:       ipf_nat (read or write)                                     */
5430/*                                                                          */
5431/* Put the NAT entry on its default queue entry, using rev as a helped in   */
5432/* determining which queue it should be placed on.                          */
5433/* ------------------------------------------------------------------------ */
5434void fr_setnatqueue(nat, rev, ifs)
5435nat_t *nat;
5436int rev;
5437ipf_stack_t *ifs;
5438{
5439	ipftq_t *oifq, *nifq;
5440
5441	if (nat->nat_ptr != NULL)
5442		nifq = nat->nat_ptr->in_tqehead[rev];
5443	else
5444		nifq = NULL;
5445
5446	if (nifq == NULL) {
5447		switch (nat->nat_p)
5448		{
5449		case IPPROTO_UDP :
5450			nifq = &ifs->ifs_nat_udptq;
5451			break;
5452		case IPPROTO_ICMP :
5453			nifq = &ifs->ifs_nat_icmptq;
5454			break;
5455		case IPPROTO_TCP :
5456			nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5457			break;
5458		default :
5459			nifq = &ifs->ifs_nat_iptq;
5460			break;
5461		}
5462	}
5463
5464	oifq = nat->nat_tqe.tqe_ifq;
5465	/*
5466	 * If it's currently on a timeout queue, move it from one queue to
5467	 * another, else put it on the end of the newly determined queue.
5468	 */
5469	if (oifq != NULL)
5470		fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5471	else
5472		fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5473	return;
5474}
5475
5476/* ------------------------------------------------------------------------ */
5477/* Function:    nat_getnext                                                 */
5478/* Returns:     int - 0 == ok, else error                                   */
5479/* Parameters:  t(I)   - pointer to ipftoken structure                      */
5480/*              itp(I) - pointer to ipfgeniter_t structure                  */
5481/*              ifs - ipf stack instance                                    */
5482/*                                                                          */
5483/* Fetch the next nat/ipnat/hostmap structure pointer from the linked list  */
5484/* and copy it out to the storage space pointed to by itp.  The next item   */
5485/* in the list to look at is put back in the ipftoken struture.             */
5486/* ------------------------------------------------------------------------ */
5487static int nat_getnext(t, itp, ifs)
5488ipftoken_t *t;
5489ipfgeniter_t *itp;
5490ipf_stack_t *ifs;
5491{
5492	hostmap_t *hm, *nexthm = NULL, zerohm;
5493	ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5494	nat_t *nat, *nextnat = NULL, zeronat;
5495	int error = 0, count;
5496	char *dst;
5497
5498	if (itp->igi_nitems == 0)
5499		return EINVAL;
5500
5501	READ_ENTER(&ifs->ifs_ipf_nat);
5502
5503	/*
5504	 * Get "previous" entry from the token and find the next entry.
5505	 */
5506	switch (itp->igi_type)
5507	{
5508	case IPFGENITER_HOSTMAP :
5509		hm = t->ipt_data;
5510		if (hm == NULL) {
5511			nexthm = ifs->ifs_ipf_hm_maplist;
5512		} else {
5513			nexthm = hm->hm_next;
5514		}
5515		break;
5516
5517	case IPFGENITER_IPNAT :
5518		ipn = t->ipt_data;
5519		if (ipn == NULL) {
5520			nextipnat = ifs->ifs_nat_list;
5521		} else {
5522			nextipnat = ipn->in_next;
5523		}
5524		break;
5525
5526	case IPFGENITER_NAT :
5527		nat = t->ipt_data;
5528		if (nat == NULL) {
5529			nextnat = ifs->ifs_nat_instances;
5530		} else {
5531			nextnat = nat->nat_next;
5532		}
5533		break;
5534	default :
5535		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5536		return EINVAL;
5537	}
5538
5539	/*
5540	 * Note, this loop is based on the number of items that a user
5541	 * requested. The user can request any number, potentially far more than
5542	 * the number of items that actually exist. If a user does that, we'll
5543	 * break out of this by setting the value of count to 1 which terminates
5544	 * the loop.  This should be fine from an ioctl perspective, because the
5545	 * last entry that we insert will be the zero entry which terminates the
5546	 * chain.
5547	 */
5548	dst = itp->igi_data;
5549	for (count = itp->igi_nitems; count > 0; count--) {
5550		/*
5551		 * If we found an entry, add a reference to it and update the token.
5552		 * Otherwise, zero out data to be returned and NULL out token.
5553		 */
5554		switch (itp->igi_type)
5555		{
5556		case IPFGENITER_HOSTMAP :
5557			if (nexthm != NULL) {
5558				ATOMIC_INC32(nexthm->hm_ref);
5559				t->ipt_data = nexthm;
5560			} else {
5561				bzero(&zerohm, sizeof(zerohm));
5562				nexthm = &zerohm;
5563				t->ipt_data = NULL;
5564			}
5565			break;
5566		case IPFGENITER_IPNAT :
5567			if (nextipnat != NULL) {
5568				ATOMIC_INC32(nextipnat->in_use);
5569				t->ipt_data = nextipnat;
5570			} else {
5571				bzero(&zeroipn, sizeof(zeroipn));
5572				nextipnat = &zeroipn;
5573				t->ipt_data = NULL;
5574			}
5575			break;
5576		case IPFGENITER_NAT :
5577			if (nextnat != NULL) {
5578				MUTEX_ENTER(&nextnat->nat_lock);
5579				nextnat->nat_ref++;
5580				MUTEX_EXIT(&nextnat->nat_lock);
5581				t->ipt_data = nextnat;
5582			} else {
5583				bzero(&zeronat, sizeof(zeronat));
5584				nextnat = &zeronat;
5585				t->ipt_data = NULL;
5586			}
5587			break;
5588		default :
5589			break;
5590		}
5591
5592		/*
5593		 * Now that we have ref, it's save to give up lock.
5594		 */
5595		RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5596
5597		/*
5598		 * Copy out data and clean up references and token as needed.
5599		 */
5600		switch (itp->igi_type)
5601		{
5602		case IPFGENITER_HOSTMAP :
5603			error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5604			if (error != 0)
5605				error = EFAULT;
5606			if (t->ipt_data == NULL) {
5607				ipf_freetoken(t, ifs);
5608				count = 1;
5609				break;
5610			} else {
5611				if (hm != NULL) {
5612					WRITE_ENTER(&ifs->ifs_ipf_nat);
5613					fr_hostmapdel(&hm);
5614					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5615				}
5616				if (nexthm->hm_next == NULL) {
5617					ipf_freetoken(t, ifs);
5618					count = 1;
5619					break;
5620				}
5621				dst += sizeof(*nexthm);
5622				hm = nexthm;
5623				nexthm = nexthm->hm_next;
5624			}
5625			break;
5626
5627		case IPFGENITER_IPNAT :
5628			error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5629			if (error != 0)
5630				error = EFAULT;
5631			if (t->ipt_data == NULL) {
5632				ipf_freetoken(t, ifs);
5633				count = 1;
5634				break;
5635			} else {
5636				if (ipn != NULL) {
5637					WRITE_ENTER(&ifs->ifs_ipf_nat);
5638					fr_ipnatderef(&ipn, ifs);
5639					RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5640				}
5641				if (nextipnat->in_next == NULL) {
5642					ipf_freetoken(t, ifs);
5643					count = 1;
5644					break;
5645				}
5646				dst += sizeof(*nextipnat);
5647				ipn = nextipnat;
5648				nextipnat = nextipnat->in_next;
5649			}
5650			break;
5651
5652		case IPFGENITER_NAT :
5653			error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5654			if (error != 0)
5655				error = EFAULT;
5656			if (t->ipt_data == NULL) {
5657				ipf_freetoken(t, ifs);
5658				count = 1;
5659				break;
5660			} else {
5661				if (nat != NULL)
5662					fr_natderef(&nat, ifs);
5663				if (nextnat->nat_next == NULL) {
5664					ipf_freetoken(t, ifs);
5665					count = 1;
5666					break;
5667				}
5668				dst += sizeof(*nextnat);
5669				nat = nextnat;
5670				nextnat = nextnat->nat_next;
5671			}
5672			break;
5673		default :
5674			break;
5675		}
5676
5677		if ((count == 1) || (error != 0))
5678			break;
5679
5680		READ_ENTER(&ifs->ifs_ipf_nat);
5681	}
5682
5683	return error;
5684}
5685
5686
5687/* ------------------------------------------------------------------------ */
5688/* Function:    nat_iterator                                                */
5689/* Returns:     int - 0 == ok, else error                                   */
5690/* Parameters:  token(I) - pointer to ipftoken structure                    */
5691/*              itp(I) - pointer to ipfgeniter_t structure                  */
5692/*                                                                          */
5693/* This function acts as a handler for the SIOCGENITER ioctls that use a    */
5694/* generic structure to iterate through a list.  There are three different  */
5695/* linked lists of NAT related information to go through: NAT rules, active */
5696/* NAT mappings and the NAT fragment cache.                                 */
5697/* ------------------------------------------------------------------------ */
5698static int nat_iterator(token, itp, ifs)
5699ipftoken_t *token;
5700ipfgeniter_t *itp;
5701ipf_stack_t *ifs;
5702{
5703	int error;
5704
5705	if (itp->igi_data == NULL)
5706		return EFAULT;
5707
5708	token->ipt_subtype = itp->igi_type;
5709
5710	switch (itp->igi_type)
5711	{
5712	case IPFGENITER_HOSTMAP :
5713	case IPFGENITER_IPNAT :
5714	case IPFGENITER_NAT :
5715		error = nat_getnext(token, itp, ifs);
5716		break;
5717	case IPFGENITER_NATFRAG :
5718		error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5719				    &ifs->ifs_ipfr_nattail,
5720				    &ifs->ifs_ipf_natfrag, ifs);
5721		break;
5722	default :
5723		error = EINVAL;
5724		break;
5725	}
5726
5727	return error;
5728}
5729
5730
5731/* ---------------------------------------------------------------------- */
5732/* Function:    nat_flushtable						  */
5733/* Returns:     int - 0 == success, -1 == failure			  */
5734/* Parameters:  flush_option - how to flush the active NAT table	  */
5735/*              ifs - ipf stack instance				  */
5736/* Write Locks: ipf_nat							  */
5737/*									  */
5738/* Flush NAT tables.  Three actions currently defined:                    */
5739/*									  */
5740/* FLUSH_TABLE_ALL	: Flush all NAT table entries			  */
5741/*									  */
5742/* FLUSH_TABLE_CLOSING	: Flush entries with TCP connections which	  */
5743/*			  have started to close on both ends using	  */
5744/*			  ipf_flushclosing().				  */
5745/*									  */
5746/* FLUSH_TABLE_EXTRA	: First, flush entries which are "almost" closed. */
5747/*			  Then, if needed, flush entries with TCP	  */
5748/*			  connections which have been idle for a long	  */
5749/*			  time with ipf_extraflush().			  */
5750/* ---------------------------------------------------------------------- */
5751static int nat_flushtable(flush_option, ifs)
5752int flush_option;
5753ipf_stack_t *ifs;
5754{
5755        nat_t *nat, *natn;
5756        int removed;
5757        SPL_INT(s);
5758
5759        removed = 0;
5760
5761        SPL_NET(s);
5762        switch (flush_option)
5763        {
5764        case FLUSH_TABLE_ALL:
5765		natn = ifs->ifs_nat_instances;
5766		while ((nat = natn) != NULL) {
5767			natn = nat->nat_next;
5768			if (nat_delete(nat, NL_FLUSH, ifs) == 0)
5769				removed++;
5770		}
5771                break;
5772
5773        case FLUSH_TABLE_CLOSING:
5774                removed = ipf_flushclosing(NAT_FLUSH,
5775					   IPF_TCPS_CLOSE_WAIT,
5776					   ifs->ifs_nat_tqb,
5777					   ifs->ifs_nat_utqe,
5778					   ifs);
5779                break;
5780
5781        case FLUSH_TABLE_EXTRA:
5782                removed = ipf_flushclosing(NAT_FLUSH,
5783					   IPF_TCPS_FIN_WAIT_2,
5784					   ifs->ifs_nat_tqb,
5785					   ifs->ifs_nat_utqe,
5786					   ifs);
5787
5788                /*
5789                 * Be sure we haven't done this in the last 10 seconds.
5790                 */
5791                if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5792                    IPF_TTLVAL(10))
5793                        break;
5794                ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5795                removed += ipf_extraflush(NAT_FLUSH,
5796					  &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5797					  ifs->ifs_nat_utqe,
5798					  ifs);
5799                break;
5800
5801        default: /* Flush Nothing */
5802                break;
5803        }
5804
5805        SPL_X(s);
5806        return (removed);
5807}
5808
5809
5810/* ------------------------------------------------------------------------ */
5811/* Function:    nat_uncreate                                                */
5812/* Returns:     Nil                                                         */
5813/* Parameters:  fin(I) - pointer to packet information                      */
5814/*                                                                          */
5815/* This function is used to remove a NAT entry from the NAT table when we   */
5816/* decide that the create was actually in error. It is thus assumed that    */
5817/* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5818/* with the translated packet (not the original), we have to reverse the    */
5819/* lookup. Although doing the lookup is expensive (relatively speaking), it */
5820/* is not anticipated that this will be a frequent occurance for normal     */
5821/* traffic patterns.                                                        */
5822/* ------------------------------------------------------------------------ */
5823void nat_uncreate(fin)
5824fr_info_t *fin;
5825{
5826	ipf_stack_t *ifs = fin->fin_ifs;
5827	int nflags;
5828	nat_t *nat;
5829
5830	switch (fin->fin_p)
5831	{
5832	case IPPROTO_TCP :
5833		nflags = IPN_TCP;
5834		break;
5835	case IPPROTO_UDP :
5836		nflags = IPN_UDP;
5837		break;
5838	default :
5839		nflags = 0;
5840		break;
5841	}
5842
5843	WRITE_ENTER(&ifs->ifs_ipf_nat);
5844
5845	if (fin->fin_out == 0) {
5846		nat = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
5847				    fin->fin_dst, fin->fin_src);
5848	} else {
5849		nat = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
5850				   fin->fin_src, fin->fin_dst);
5851	}
5852
5853	if (nat != NULL) {
5854		ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][0]++;
5855		(void) nat_delete(nat, NL_DESTROY, ifs);
5856	} else {
5857		ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][1]++;
5858	}
5859
5860	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5861}
5862