/*
 * Copyright (C) 1993-2001, 2003 by Darren Reed.
 *
 * See the IPFILTER.LICENCE file for details on licencing.
 *
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 *
 * Copyright 2018 Joyent, Inc.
 */

#if !defined(lint)
static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
#endif

#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/cpuvar.h>
#include <sys/open.h>
#include <sys/ioctl.h>
#include <sys/filio.h>
#include <sys/systm.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/cred.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ksynch.h>
#include <sys/kmem.h>
#include <sys/mac_provider.h>
#include <sys/mkdev.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/dditypes.h>
#include <sys/cmn_err.h>
#include <sys/zone.h>
#include <net/if.h>
#include <net/af.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/tcpip.h>
#include <netinet/ip_icmp.h>
#include "netinet/ip_compat.h"
#ifdef	USE_INET6
# include <netinet/icmp6.h>
#endif
#include "netinet/ip_fil.h"
#include "netinet/ip_nat.h"
#include "netinet/ip_frag.h"
#include "netinet/ip_state.h"
#include "netinet/ip_auth.h"
#include "netinet/ip_proxy.h"
#include "netinet/ipf_stack.h"
#ifdef	IPFILTER_LOOKUP
# include "netinet/ip_lookup.h"
#endif
#include <inet/ip_ire.h>

#include <sys/md5.h>
#include <sys/neti.h>

static	int	frzerostats __P((caddr_t, ipf_stack_t *));
static	int	fr_setipfloopback __P((int, ipf_stack_t *));
static	int	fr_enableipf __P((ipf_stack_t *, int));
static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook __P((hook_data_t, int, int, void *));
static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
    void *));
static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
    void *));
static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
    void *));
static	int     ipf_hook6 __P((hook_data_t, int, int, void *));

static	int	ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
static	int	ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
    void *));

extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));

static int	ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
    const char *, const char *, const char *));
static int	ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
    const char *, const char *, const char *));

#if SOLARIS2 < 10
#if SOLARIS2 >= 7
u_int		*ip_ttl_ptr = NULL;
u_int		*ip_mtudisc = NULL;
# if SOLARIS2 >= 8
int		*ip_forwarding = NULL;
u_int		*ip6_forwarding = NULL;
# else
u_int		*ip_forwarding = NULL;
# endif
#else
u_long		*ip_ttl_ptr = NULL;
u_long		*ip_mtudisc = NULL;
u_long		*ip_forwarding = NULL;
#endif
#endif

vmem_t	*ipf_minor;	/* minor number arena */
void 	*ipf_state;	/* DDI state */

/*
 * GZ-controlled and per-zone stacks:
 *
 * For each non-global zone, we create two ipf stacks: the per-zone stack and
 * the GZ-controlled stack.  The per-zone stack can be controlled and observed
 * from inside the zone or from the global zone.  The GZ-controlled stack can
 * only be controlled and observed from the global zone (though the rules
 * still only affect that non-global zone).
 *
 * The two hooks are always arranged so that the GZ-controlled stack is always
 * "outermost" with respect to the zone.  The traffic flow then looks like
 * this:
 *
 * Inbound:
 *
 *     nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
 *
 * Outbound:
 *
 *     nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
 */

/* IPv4 hook names */
char *hook4_nicevents = 	"ipfilter_hook4_nicevents";
char *hook4_nicevents_gz = 	"ipfilter_hook4_nicevents_gz";
char *hook4_in = 		"ipfilter_hook4_in";
char *hook4_in_gz = 		"ipfilter_hook4_in_gz";
char *hook4_out = 		"ipfilter_hook4_out";
char *hook4_out_gz = 		"ipfilter_hook4_out_gz";
char *hook4_loop_in = 		"ipfilter_hook4_loop_in";
char *hook4_loop_in_gz = 	"ipfilter_hook4_loop_in_gz";
char *hook4_loop_out = 		"ipfilter_hook4_loop_out";
char *hook4_loop_out_gz = 	"ipfilter_hook4_loop_out_gz";

/* IPv6 hook names */
char *hook6_nicevents = 	"ipfilter_hook6_nicevents";
char *hook6_nicevents_gz = 	"ipfilter_hook6_nicevents_gz";
char *hook6_in = 		"ipfilter_hook6_in";
char *hook6_in_gz = 		"ipfilter_hook6_in_gz";
char *hook6_out = 		"ipfilter_hook6_out";
char *hook6_out_gz = 		"ipfilter_hook6_out_gz";
char *hook6_loop_in = 		"ipfilter_hook6_loop_in";
char *hook6_loop_in_gz = 	"ipfilter_hook6_loop_in_gz";
char *hook6_loop_out = 		"ipfilter_hook6_loop_out";
char *hook6_loop_out_gz = 	"ipfilter_hook6_loop_out_gz";

/* viona hook names */
char *hook_viona_in =		"ipfilter_hookviona_in";
char *hook_viona_in_gz =	"ipfilter_hookviona_in_gz";
char *hook_viona_out =		"ipfilter_hookviona_out";
char *hook_viona_out_gz =	"ipfilter_hookviona_out_gz";

/* ------------------------------------------------------------------------ */
/* Function:    ipldetach                                                   */
/* Returns:     int - 0 == success, else error.                             */
/* Parameters:  Nil                                                         */
/*                                                                          */
/* This function is responsible for undoing anything that might have been   */
/* done in a call to iplattach().  It must be able to clean up from a call  */
/* to iplattach() that did not succeed.  Why might that happen?  Someone    */
/* configures a table to be so large that we cannot allocate enough memory  */
/* for it.                                                                  */
/* ------------------------------------------------------------------------ */
int ipldetach(ifs)
ipf_stack_t *ifs;
{

	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));

#if SOLARIS2 < 10

	if (ifs->ifs_fr_control_forwarding & 2) {
		if (ip_forwarding != NULL)
			*ip_forwarding = 0;
#if SOLARIS2 >= 8
		if (ip6_forwarding != NULL)
			*ip6_forwarding = 0;
#endif
	}
#endif

	/*
	 * This lock needs to be dropped around the net_hook_unregister calls
	 * because we can deadlock here with:
	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
	 */
	RWLOCK_EXIT(&ifs->ifs_ipf_global);

#define	UNDO_HOOK(_f, _b, _e, _h)					\
	do {								\
		if (ifs->_f != NULL) {					\
			if (ifs->_b) {					\
				int tmp = net_hook_unregister(ifs->_f,	\
					   _e, ifs->_h);		\
				ifs->_b = (tmp != 0 && tmp != ENXIO);	\
				if (!ifs->_b && ifs->_h != NULL) {	\
					hook_free(ifs->_h);		\
					ifs->_h = NULL;			\
				}					\
			} else if (ifs->_h != NULL) {			\
				hook_free(ifs->_h);			\
				ifs->_h = NULL;				\
			}						\
		}							\
		_NOTE(CONSTCOND)					\
	} while (0)

	/*
	 * Remove IPv6 Hooks
	 */
	if (ifs->ifs_ipf_ipv6 != NULL) {
		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);

		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
			goto detach_failed;
		ifs->ifs_ipf_ipv6 = NULL;
        }

	/*
	 * Remove IPv4 Hooks
	 */
	if (ifs->ifs_ipf_ipv4 != NULL) {
		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);

		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
			goto detach_failed;
		ifs->ifs_ipf_ipv4 = NULL;
	}

	/*
	 * Remove notification of viona hooks
	 */
	net_instance_notify_unregister(ifs->ifs_netid,
	    ipf_hook_instance_notify);

#undef UNDO_HOOK

	/*
	 * Normally, viona will unregister itself before ipldetach() is called,
	 * so these will be no-ops, but out of caution, we try to make sure
	 * we've removed any of our references.
	 */
	(void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
	    NH_PHYSICAL_IN);
	(void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
	    NH_PHYSICAL_OUT);

	{
		char netidstr[12]; /* Large enough for INT_MAX + NUL */
		(void) snprintf(netidstr, sizeof (netidstr), "%d",
		    ifs->ifs_netid);

		/*
		 * The notify callbacks expect the netid value passed as a
		 * string in the third argument.  To prevent confusion if
		 * traced, we pass the same value the nethook framework would
		 * pass, even though the callback does not currently use the
		 * value.
		 */
		(void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
		    NULL, Hn_VIONA);
	}

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "ipldetach()\n");
#endif

	WRITE_ENTER(&ifs->ifs_ipf_global);
	fr_deinitialise(ifs);

	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);

	if (ifs->ifs_ipf_locks_done == 1) {
		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
		RW_DESTROY(&ifs->ifs_ipf_tokens);
		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
		ifs->ifs_ipf_locks_done = 0;
	}

	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
		return -1;

	return 0;

detach_failed:
	WRITE_ENTER(&ifs->ifs_ipf_global);
	return -1;
}

int iplattach(ifs)
ipf_stack_t *ifs;
{
#if SOLARIS2 < 10
	int i;
#endif
	netid_t id = ifs->ifs_netid;

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplattach()\n");
#endif

	ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
	ifs->ifs_fr_flags = IPF_LOGGING;
#ifdef _KERNEL
	ifs->ifs_fr_update_ipid = 0;
#else
	ifs->ifs_fr_update_ipid = 1;
#endif
	ifs->ifs_fr_minttl = 4;
	ifs->ifs_fr_icmpminfragmtu = 68;
#if defined(IPFILTER_DEFAULT_BLOCK)
	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
#else
	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
#endif

	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
	ifs->ifs_ipf_locks_done = 1;

	if (fr_initialise(ifs) < 0)
		return -1;

	/*
	 * For incoming packets, we want the GZ-controlled hooks to run before
	 * the per-zone hooks, regardless of what order they're are installed.
	 * See the "GZ-controlled and per-zone stacks" comment block at the top
	 * of this file.
	 */
#define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a)				\
	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
	(x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER;	\
	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);

	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
		  hook4_nicevents, hook4_nicevents_gz, ifs);
	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
		  hook4_in, hook4_in_gz, ifs);
	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
		  hook4_loop_in, hook4_loop_in_gz, ifs);

	/*
	 * For outgoing packets, we want the GZ-controlled hooks to run after
	 * the per-zone hooks, regardless of what order they're are installed.
	 * See the "GZ-controlled and per-zone stacks" comment block at the top
	 * of this file.
	 */
#define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a)				\
	HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs);	\
	(x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE;	\
	(x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);

	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
		  hook4_out, hook4_out_gz, ifs);
	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
		  hook4_loop_out, hook4_loop_out_gz, ifs);

	/*
	 * If we hold this lock over all of the net_hook_register calls, we
	 * can cause a deadlock to occur with the following lock ordering:
	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
	 */
	RWLOCK_EXIT(&ifs->ifs_ipf_global);

	/*
	 * Add IPv4 hooks
	 */
	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
	if (ifs->ifs_ipf_ipv4 == NULL)
		goto hookup_failed;

	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
	if (!ifs->ifs_hook4_nic_events)
		goto hookup_failed;

	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
	if (!ifs->ifs_hook4_physical_in)
		goto hookup_failed;

	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
	if (!ifs->ifs_hook4_physical_out)
		goto hookup_failed;

	if (ifs->ifs_ipf_loopback) {
		ifs->ifs_hook4_loopback_in = (net_hook_register(
		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
		    ifs->ifs_ipfhook4_loop_in) == 0);
		if (!ifs->ifs_hook4_loopback_in)
			goto hookup_failed;

		ifs->ifs_hook4_loopback_out = (net_hook_register(
		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
		    ifs->ifs_ipfhook4_loop_out) == 0);
		if (!ifs->ifs_hook4_loopback_out)
			goto hookup_failed;
	}

	/*
	 * Add IPv6 hooks
	 */
	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
	if (ifs->ifs_ipf_ipv6 == NULL)
		goto hookup_failed;

	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
		  hook6_nicevents, hook6_nicevents_gz, ifs);
	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
		  hook6_in, hook6_in_gz, ifs);
	HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
		  hook6_loop_in, hook6_loop_in_gz, ifs);
	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
		  hook6_out, hook6_out_gz, ifs);
	HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
		  hook6_loop_out, hook6_loop_out_gz, ifs);

	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
	if (!ifs->ifs_hook6_nic_events)
		goto hookup_failed;

	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
	if (!ifs->ifs_hook6_physical_in)
		goto hookup_failed;

	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
	if (!ifs->ifs_hook6_physical_out)
		goto hookup_failed;

	if (ifs->ifs_ipf_loopback) {
		ifs->ifs_hook6_loopback_in = (net_hook_register(
		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
		    ifs->ifs_ipfhook6_loop_in) == 0);
		if (!ifs->ifs_hook6_loopback_in)
			goto hookup_failed;

		ifs->ifs_hook6_loopback_out = (net_hook_register(
		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
		    ifs->ifs_ipfhook6_loop_out) == 0);
		if (!ifs->ifs_hook6_loopback_out)
			goto hookup_failed;
	}

	/*
	 * VIONA INET hooks.  While the nethook framework allows us to register
	 * hooks for events that haven't been registered yet, we instead
	 * register and unregister our hooks in response to notifications
	 * about the viona hooks from the nethook framework.  This prevents
	 * problems when the viona module gets unloaded while the ipf module
	 * does not.  If we do not unregister our hooks after the viona module
	 * is unloaded, the viona module cannot later re-register them if it
	 * gets reloaded.  As the ip, vnd, and ipf modules are rarely unloaded
	 * even on DEBUG kernels, they do not experience this issue.
	 */
	if (net_instance_notify_register(id, ipf_hook_instance_notify,
	    ifs) != 0)
		goto hookup_failed;

	/*
	 * Reacquire ipf_global, now it is safe.
	 */
	WRITE_ENTER(&ifs->ifs_ipf_global);

/* Do not use private interface ip_params_arr[] in Solaris 10 */
#if SOLARIS2 < 10

#if SOLARIS2 >= 8
	ip_forwarding = &ip_g_forward;
#endif
	/*
	 * XXX - There is no terminator for this array, so it is not possible
	 * to tell if what we are looking for is missing and go off the end
	 * of the array.
	 */

#if SOLARIS2 <= 8
	for (i = 0; ; i++) {
		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
		} else if (!strcmp(ip_param_arr[i].ip_param_name,
			    "ip_path_mtu_discovery")) {
			ip_mtudisc = &ip_param_arr[i].ip_param_value;
		}
#if SOLARIS2 < 8
		else if (!strcmp(ip_param_arr[i].ip_param_name,
			    "ip_forwarding")) {
			ip_forwarding = &ip_param_arr[i].ip_param_value;
		}
#else
		else if (!strcmp(ip_param_arr[i].ip_param_name,
			    "ip6_forwarding")) {
			ip6_forwarding = &ip_param_arr[i].ip_param_value;
		}
#endif

		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
#if SOLARIS2 >= 8
		    ip6_forwarding != NULL &&
#endif
		    ip_forwarding != NULL)
			break;
	}
#endif

	if (ifs->ifs_fr_control_forwarding & 1) {
		if (ip_forwarding != NULL)
			*ip_forwarding = 1;
#if SOLARIS2 >= 8
		if (ip6_forwarding != NULL)
			*ip6_forwarding = 1;
#endif
	}

#endif

	return 0;
hookup_failed:
	WRITE_ENTER(&ifs->ifs_ipf_global);
	return -1;
}

/* ------------------------------------------------------------------------ */
/*
 * Called whenever a nethook protocol is registered or unregistered.  Currently
 * only used to add or remove the hooks for viona.
 *
 * While the function signature requires returning int, nothing
 * in usr/src/uts/common/io/hook.c that invokes the callbacks
 * captures the return value (nor is there currently any documentation
 * on what return values should be).  For now at least, we'll return 0
 * on success (or 'not applicable') or an error value.  Even if the
 * nethook framework doesn't use the return address, it can be observed via
 * dtrace if needed.
 */
static int
ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
    const char *name, const char *dummy __unused, const char *he_name)
{
	ipf_stack_t *ifs = arg;
	hook_t **hookpp;
	char *hook_name, *hint_name;
	hook_func_t hookfn;
	boolean_t *hookedp;
	hook_hint_t hint;
	boolean_t out;
	int ret = 0;

	const boolean_t gz = ifs->ifs_gz_controlled;

	/* We currently only care about viona hooks notifications */
	if (strcmp(name, Hn_VIONA) != 0)
		return (0);

	if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
		out = B_FALSE;
	} else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
		out = B_TRUE;
	} else {
		/*
		 * If we've added more hook events to viona, we must add
		 * the corresponding handling here (even if it's just to
		 * ignore it) to prevent the firewall from not working as
		 * intended.
		 */
		cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
		    he_name);

		return (0);
	}

	if (out) {
		hookpp = &ifs->ifs_ipfhookviona_out;
		hookfn = ipf_hookviona_out;
		hookedp = &ifs->ifs_hookviona_physical_out;
		name = gz ? hook_viona_out_gz : hook_viona_out;
		hint = gz ? HH_AFTER : HH_BEFORE;
		hint_name = gz ? hook_viona_out : hook_viona_out_gz;
	} else {
		hookpp = &ifs->ifs_ipfhookviona_in;
		hookfn = ipf_hookviona_in;
		hookedp = &ifs->ifs_hookviona_physical_in;
		name = gz ? hook_viona_in_gz : hook_viona_in;
		hint = gz ? HH_BEFORE : HH_AFTER;
		hint_name = gz ? hook_viona_in : hook_viona_in_gz;
	}

	switch (command) {
	default:
	case HN_NONE:
		break;
	case HN_REGISTER:
		HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
		(*hookpp)->h_hint = hint;
		(*hookpp)->h_hintvalue = (uintptr_t)hint_name;
		ret = net_hook_register(ifs->ifs_ipf_viona,
		    (char *)he_name, *hookpp);
		if (ret != 0) {
			cmn_err(CE_NOTE, "%s: could not register hook "
			    "(hook family=%s hook=%s) err=%d", __func__,
			    name, he_name, ret);
			*hookedp = B_FALSE;
			return (ret);
		}
		*hookedp = B_TRUE;
		break;
	case HN_UNREGISTER:
		if (ifs->ifs_ipf_viona == NULL)
			break;

		ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
		    (char *)he_name, *hookpp) : 0;
		if ((ret == 0 || ret == ENXIO)) {
			if (*hookpp != NULL) {
				hook_free(*hookpp);
				*hookpp = NULL;
			}
			*hookedp = B_FALSE;
		}
		break;
	}

	return (ret);
}

/*
 * Called whenever a new nethook instance is created.  Currently only used
 * with the Hn_VIONA nethooks.  Similar to ipf_hook_protocol_notify, the out
 * function signature must return an int, though the result is never used.
 * We elect to return 0 on success (or not applicable) or a non-zero value
 * on error.
 */
static int
ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
    const char *netid, const char *dummy __unused, const char *instance)
{
	ipf_stack_t *ifs = arg;
	int ret = 0;

	/* We currently only care about viona hooks */
	if (strcmp(instance, Hn_VIONA) != 0)
		return (0);

	switch (command) {
	case HN_NONE:
	default:
		return (0);
	case HN_REGISTER:
		ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
		    NHF_VIONA);

		if (ifs->ifs_ipf_viona == NULL)
			return (EPROTONOSUPPORT);

		ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
		    ipf_hook_protocol_notify, ifs);
		VERIFY(ret == 0 || ret == ESHUTDOWN);
		break;
	case HN_UNREGISTER:
		if (ifs->ifs_ipf_viona == NULL)
			break;
		VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
		    ipf_hook_protocol_notify));
		VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
		ifs->ifs_ipf_viona = NULL;
		break;
	}

	return (ret);
}

static	int	fr_setipfloopback(set, ifs)
int set;
ipf_stack_t *ifs;
{
	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
		return EFAULT;

	if (set && !ifs->ifs_ipf_loopback) {
		ifs->ifs_ipf_loopback = 1;

		ifs->ifs_hook4_loopback_in = (net_hook_register(
		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
		    ifs->ifs_ipfhook4_loop_in) == 0);
		if (!ifs->ifs_hook4_loopback_in)
			return EINVAL;

		ifs->ifs_hook4_loopback_out = (net_hook_register(
		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
		    ifs->ifs_ipfhook4_loop_out) == 0);
		if (!ifs->ifs_hook4_loopback_out)
			return EINVAL;

		ifs->ifs_hook6_loopback_in = (net_hook_register(
		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
		    ifs->ifs_ipfhook6_loop_in) == 0);
		if (!ifs->ifs_hook6_loopback_in)
			return EINVAL;

		ifs->ifs_hook6_loopback_out = (net_hook_register(
		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
		    ifs->ifs_ipfhook6_loop_out) == 0);
		if (!ifs->ifs_hook6_loopback_out)
			return EINVAL;

	} else if (!set && ifs->ifs_ipf_loopback) {
		ifs->ifs_ipf_loopback = 0;

		ifs->ifs_hook4_loopback_in =
		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
		if (ifs->ifs_hook4_loopback_in)
			return EBUSY;

		ifs->ifs_hook4_loopback_out =
		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
		if (ifs->ifs_hook4_loopback_out)
			return EBUSY;

		ifs->ifs_hook6_loopback_in =
		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
		if (ifs->ifs_hook6_loopback_in)
			return EBUSY;

		ifs->ifs_hook6_loopback_out =
		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
		if (ifs->ifs_hook6_loopback_out)
			return EBUSY;
	}
	return 0;
}


/*
 * Filter ioctl interface.
 */
/*ARGSUSED*/
int iplioctl(dev, cmd, data, mode, cp, rp)
dev_t dev;
int cmd;
#if SOLARIS2 >= 7
intptr_t data;
#else
int *data;
#endif
int mode;
cred_t *cp;
int *rp;
{
	int error = 0, tmp;
	friostat_t fio;
	minor_t unit;
	u_int enable;
	ipf_stack_t *ifs;
	zoneid_t zid;
	ipf_devstate_t *isp;

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
		dev, cmd, data, mode, cp, rp);
#endif
	unit = getminor(dev);

	isp = ddi_get_soft_state(ipf_state, unit);
	if (isp == NULL)
		return ENXIO;
	unit = isp->ipfs_minor;

	zid = crgetzoneid(cp);
	if (cmd == SIOCIPFZONESET) {
		if (zid == GLOBAL_ZONEID)
			return fr_setzoneid(isp, (caddr_t) data);
		return EACCES;
	}

        /*
	 * ipf_find_stack returns with a read lock on ifs_ipf_global
	 */
	ifs = ipf_find_stack(zid, isp);
	if (ifs == NULL)
		return ENXIO;

	if (ifs->ifs_fr_running <= 0) {
		if (unit != IPL_LOGIPF) {
			RWLOCK_EXIT(&ifs->ifs_ipf_global);
			return EIO;
		}
		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
			RWLOCK_EXIT(&ifs->ifs_ipf_global);
			return EIO;
		}
	}

	if (ifs->ifs_fr_enable_active != 0) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return EBUSY;
	}

	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
			       curproc, ifs);
	if (error != -1) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return error;
	}
	error = 0;

	switch (cmd)
	{
	case SIOCFRENB :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			error = COPYIN((caddr_t)data, (caddr_t)&enable,
				       sizeof(enable));
			if (error != 0) {
				error = EFAULT;
				break;
			}

			RWLOCK_EXIT(&ifs->ifs_ipf_global);
			WRITE_ENTER(&ifs->ifs_ipf_global);

			/*
			 * We must recheck fr_enable_active here, since we've
			 * dropped ifs_ipf_global from R in order to get it
			 * exclusively.
			 */
			if (ifs->ifs_fr_enable_active == 0) {
				ifs->ifs_fr_enable_active = 1;
				error = fr_enableipf(ifs, enable);
				ifs->ifs_fr_enable_active = 0;
			}
		}
		break;
	case SIOCIPFSET :
		if (!(mode & FWRITE)) {
			error = EPERM;
			break;
		}
		/* FALLTHRU */
	case SIOCIPFGETNEXT :
	case SIOCIPFGET :
		error = fr_ipftune(cmd, (void *)data, ifs);
		break;
	case SIOCSETFF :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			error = COPYIN((caddr_t)data,
				       (caddr_t)&ifs->ifs_fr_flags,
				       sizeof(ifs->ifs_fr_flags));
			if (error != 0)
				error = EFAULT;
		}
		break;
	case SIOCIPFLP :
		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
			       sizeof(tmp));
		if (error != 0)
			error = EFAULT;
		else
			error = fr_setipfloopback(tmp, ifs);
		break;
	case SIOCGETFF :
		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
				sizeof(ifs->ifs_fr_flags));
		if (error != 0)
			error = EFAULT;
		break;
	case SIOCFUNCL :
		error = fr_resolvefunc((void *)data);
		break;
	case SIOCINAFR :
	case SIOCRMAFR :
	case SIOCADAFR :
	case SIOCZRLST :
		if (!(mode & FWRITE))
			error = EPERM;
		else
			error = frrequest(unit, cmd, (caddr_t)data,
					  ifs->ifs_fr_active, 1, ifs);
		break;
	case SIOCINIFR :
	case SIOCRMIFR :
	case SIOCADIFR :
		if (!(mode & FWRITE))
			error = EPERM;
		else
			error = frrequest(unit, cmd, (caddr_t)data,
					  1 - ifs->ifs_fr_active, 1, ifs);
		break;
	case SIOCSWAPA :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			WRITE_ENTER(&ifs->ifs_ipf_mutex);
			bzero((char *)ifs->ifs_frcache,
			    sizeof (ifs->ifs_frcache));
			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
					(caddr_t)data,
					sizeof(ifs->ifs_fr_active));
			if (error != 0)
				error = EFAULT;
			else
				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
		}
		break;
	case SIOCGETFS :
		fr_getstat(&fio, ifs);
		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
		break;
	case SIOCFRZST :
		if (!(mode & FWRITE))
			error = EPERM;
		else
			error = fr_zerostats((caddr_t)data, ifs);
		break;
	case	SIOCIPFFL :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
				       sizeof(tmp));
			if (!error) {
				tmp = frflush(unit, 4, tmp, ifs);
				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
						sizeof(tmp));
				if (error != 0)
					error = EFAULT;
			} else
				error = EFAULT;
		}
		break;
#ifdef USE_INET6
	case	SIOCIPFL6 :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
				       sizeof(tmp));
			if (!error) {
				tmp = frflush(unit, 6, tmp, ifs);
				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
						sizeof(tmp));
				if (error != 0)
					error = EFAULT;
			} else
				error = EFAULT;
		}
		break;
#endif
	case SIOCSTLCK :
		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
		if (error == 0) {
			ifs->ifs_fr_state_lock = tmp;
			ifs->ifs_fr_nat_lock = tmp;
			ifs->ifs_fr_frag_lock = tmp;
			ifs->ifs_fr_auth_lock = tmp;
		} else
			error = EFAULT;
	break;
#ifdef	IPFILTER_LOG
	case	SIOCIPFFB :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			tmp = ipflog_clear(unit, ifs);
			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
				       sizeof(tmp));
			if (error)
				error = EFAULT;
		}
		break;
#endif /* IPFILTER_LOG */
	case SIOCFRSYN :
		if (!(mode & FWRITE))
			error = EPERM;
		else {
			RWLOCK_EXIT(&ifs->ifs_ipf_global);
			WRITE_ENTER(&ifs->ifs_ipf_global);

			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
			fr_nataddrsync(0, NULL, NULL, ifs);
			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
			error = 0;
		}
		break;
	case SIOCGFRST :
		error = fr_outobj((void *)data, fr_fragstats(ifs),
				  IPFOBJ_FRAGSTAT);
		break;
	case FIONREAD :
#ifdef	IPFILTER_LOG
		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];

		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
		if (error != 0)
			error = EFAULT;
#endif
		break;
	case SIOCIPFITER :
		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
				       curproc, ifs);
		break;

	case SIOCGENITER :
		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
					curproc, ifs);
		break;

	case SIOCIPFDELTOK :
		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
		if (error != 0) {
			error = EFAULT;
		} else {
			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
		}
		break;

	default :
#ifdef	IPFDEBUG
		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
			cmd, (void *)data);
#endif
		error = EINVAL;
		break;
	}
	RWLOCK_EXIT(&ifs->ifs_ipf_global);
	return error;
}


static int fr_enableipf(ifs, enable)
ipf_stack_t *ifs;
int enable;
{
	int error;

	if (!enable) {
		error = ipldetach(ifs);
		if (error == 0)
			ifs->ifs_fr_running = -1;
		return error;
	}

	if (ifs->ifs_fr_running > 0)
		return 0;

	error = iplattach(ifs);
	if (error == 0) {
		if (ifs->ifs_fr_timer_id == NULL) {
			int hz = drv_usectohz(500000);

			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
						       (void *)ifs,
						       hz);
		}
		ifs->ifs_fr_running = 1;
	} else {
		(void) ipldetach(ifs);
	}
	return error;
}


phy_if_t get_unit(name, v, ifs)
char *name;
int v;
ipf_stack_t *ifs;
{
	net_handle_t nif;

  	if (v == 4)
 		nif = ifs->ifs_ipf_ipv4;
  	else if (v == 6)
 		nif = ifs->ifs_ipf_ipv6;
  	else
 		return 0;

 	return (net_phylookup(nif, name));
}

/*
 * routines below for saving IP headers to buffer
 */
/*ARGSUSED*/
int iplopen(devp, flags, otype, cred)
dev_t *devp;
int flags, otype;
cred_t *cred;
{
	ipf_devstate_t *isp;
	minor_t min = getminor(*devp);
	minor_t minor;

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
#endif
	if (!(otype & OTYP_CHR))
		return ENXIO;

	if (IPL_LOGMAX < min)
		return ENXIO;

	minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
	    VM_BESTFIT | VM_SLEEP);

	if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
		vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
		return ENXIO;
	}

	*devp = makedevice(getmajor(*devp), minor);
	isp = ddi_get_soft_state(ipf_state, minor);
	VERIFY(isp != NULL);

	isp->ipfs_minor = min;
	isp->ipfs_zoneid = IPFS_ZONE_UNSET;

	return 0;
}


/*ARGSUSED*/
int iplclose(dev, flags, otype, cred)
dev_t dev;
int flags, otype;
cred_t *cred;
{
	minor_t	min = getminor(dev);

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
#endif

	if (IPL_LOGMAX < min)
		return ENXIO;

	ddi_soft_state_free(ipf_state, min);
	vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);

	return 0;
}

#ifdef	IPFILTER_LOG
/*
 * iplread/ipllog
 * both of these must operate with at least splnet() lest they be
 * called during packet processing and cause an inconsistancy to appear in
 * the filter lists.
 */
/*ARGSUSED*/
int iplread(dev, uio, cp)
dev_t dev;
register struct uio *uio;
cred_t *cp;
{
	ipf_stack_t *ifs;
	int ret;
	minor_t unit;
	ipf_devstate_t *isp;

	unit = getminor(dev);
	isp = ddi_get_soft_state(ipf_state, unit);
	if (isp == NULL)
		return ENXIO;
	unit = isp->ipfs_minor;


        /*
	 * ipf_find_stack returns with a read lock on ifs_ipf_global
	 */
	ifs = ipf_find_stack(crgetzoneid(cp), isp);
	if (ifs == NULL)
		return ENXIO;

# ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
# endif

	if (ifs->ifs_fr_running < 1) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return EIO;
	}

# ifdef	IPFILTER_SYNC
	if (unit == IPL_LOGSYNC) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return ipfsync_read(uio);
	}
# endif

	ret = ipflog_read(unit, uio, ifs);
	RWLOCK_EXIT(&ifs->ifs_ipf_global);
	return ret;
}
#endif /* IPFILTER_LOG */


/*
 * iplread/ipllog
 * both of these must operate with at least splnet() lest they be
 * called during packet processing and cause an inconsistancy to appear in
 * the filter lists.
 */
int iplwrite(dev, uio, cp)
dev_t dev;
register struct uio *uio;
cred_t *cp;
{
	ipf_stack_t *ifs;
	minor_t unit;
	ipf_devstate_t *isp;

	unit = getminor(dev);
	isp = ddi_get_soft_state(ipf_state, unit);
	if (isp == NULL)
		return ENXIO;
	unit = isp->ipfs_minor;

        /*
	 * ipf_find_stack returns with a read lock on ifs_ipf_global
	 */
	ifs = ipf_find_stack(crgetzoneid(cp), isp);
	if (ifs == NULL)
		return ENXIO;

#ifdef	IPFDEBUG
	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
#endif

	if (ifs->ifs_fr_running < 1) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return EIO;
	}

#ifdef	IPFILTER_SYNC
	if (getminor(dev) == IPL_LOGSYNC) {
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return ipfsync_write(uio);
	}
#endif /* IPFILTER_SYNC */
	dev = dev;	/* LINT */
	uio = uio;	/* LINT */
	cp = cp;	/* LINT */
	RWLOCK_EXIT(&ifs->ifs_ipf_global);
	return ENXIO;
}


/*
 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
 * requires a large amount of setting up and isn't any more efficient.
 */
int fr_send_reset(fin)
fr_info_t *fin;
{
	tcphdr_t *tcp, *tcp2;
	int tlen, hlen;
	mblk_t *m;
#ifdef	USE_INET6
	ip6_t *ip6;
#endif
	ip_t *ip;

	tcp = fin->fin_dp;
	if (tcp->th_flags & TH_RST)
		return -1;

#ifndef	IPFILTER_CKSUM
	if (fr_checkl4sum(fin) == -1)
		return -1;
#endif

	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
#ifdef	USE_INET6
	if (fin->fin_v == 6)
		hlen = sizeof(ip6_t);
	else
#endif
		hlen = sizeof(ip_t);
	hlen += sizeof(*tcp2);
	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
		return -1;

	m->b_rptr += 64;
	MTYPE(m) = M_DATA;
	m->b_wptr = m->b_rptr + hlen;
	ip = (ip_t *)m->b_rptr;
	bzero((char *)ip, hlen);
	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
	tcp2->th_dport = tcp->th_sport;
	tcp2->th_sport = tcp->th_dport;
	if (tcp->th_flags & TH_ACK) {
		tcp2->th_seq = tcp->th_ack;
		tcp2->th_flags = TH_RST;
	} else {
		tcp2->th_ack = ntohl(tcp->th_seq);
		tcp2->th_ack += tlen;
		tcp2->th_ack = htonl(tcp2->th_ack);
		tcp2->th_flags = TH_RST|TH_ACK;
	}
	tcp2->th_off = sizeof(struct tcphdr) >> 2;

	ip->ip_v = fin->fin_v;
#ifdef	USE_INET6
	if (fin->fin_v == 6) {
		ip6 = (ip6_t *)m->b_rptr;
		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
		ip6->ip6_src = fin->fin_dst6.in6;
		ip6->ip6_dst = fin->fin_src6.in6;
		ip6->ip6_plen = htons(sizeof(*tcp));
		ip6->ip6_nxt = IPPROTO_TCP;
		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
	} else
#endif
	{
		ip->ip_src.s_addr = fin->fin_daddr;
		ip->ip_dst.s_addr = fin->fin_saddr;
		ip->ip_id = fr_nextipid(fin);
		ip->ip_hl = sizeof(*ip) >> 2;
		ip->ip_p = IPPROTO_TCP;
		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
		ip->ip_tos = fin->fin_ip->ip_tos;
		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
	}
	return fr_send_ip(fin, m, &m);
}

/*
 * Function:	fr_send_ip
 * Returns:	 0: success
 *		-1: failed
 * Parameters:
 *	fin: packet information
 *	m: the message block where ip head starts
 *
 * Send a new packet through the IP stack.
 *
 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
 * function).
 *
 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
 * in by this function.
 *
 * All other portions of the packet must be in on-the-wire format.
 */
/*ARGSUSED*/
static int fr_send_ip(fin, m, mpp)
fr_info_t *fin;
mblk_t *m, **mpp;
{
	qpktinfo_t qpi, *qpip;
	fr_info_t fnew;
	ip_t *ip;
	int i, hlen;
	ipf_stack_t *ifs = fin->fin_ifs;

	ip = (ip_t *)m->b_rptr;
	bzero((char *)&fnew, sizeof(fnew));

#ifdef	USE_INET6
	if (fin->fin_v == 6) {
		ip6_t *ip6;

		ip6 = (ip6_t *)ip;
		ip6->ip6_vfc = 0x60;
		ip6->ip6_hlim = 127;
		fnew.fin_v = 6;
		hlen = sizeof(*ip6);
		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
	} else
#endif
	{
		fnew.fin_v = 4;
#if SOLARIS2 >= 10
		ip->ip_ttl = 255;
		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
			ip->ip_off = htons(IP_DF);
#else
		if (ip_ttl_ptr != NULL)
			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
		else
			ip->ip_ttl = 63;
		if (ip_mtudisc != NULL)
			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
		else
			ip->ip_off = htons(IP_DF);
#endif
		/*
		 * The dance with byte order and ip_len/ip_off is because in
		 * fr_fastroute, it expects them to be in host byte order but
		 * ipf_cksum expects them to be in network byte order.
		 */
		ip->ip_len = htons(ip->ip_len);
		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
		ip->ip_len = ntohs(ip->ip_len);
		ip->ip_off = ntohs(ip->ip_off);
		hlen = sizeof(*ip);
		fnew.fin_plen = ip->ip_len;
	}

	qpip = fin->fin_qpi;
	qpi.qpi_off = 0;
	qpi.qpi_ill = qpip->qpi_ill;
	qpi.qpi_m = m;
	qpi.qpi_data = ip;
	fnew.fin_qpi = &qpi;
	fnew.fin_ifp = fin->fin_ifp;
	fnew.fin_flx = FI_NOCKSUM | FI_GENERATED;
	fnew.fin_m = m;
	fnew.fin_qfm = m;
	fnew.fin_ip = ip;
	fnew.fin_mp = mpp;
	fnew.fin_hlen = hlen;
	fnew.fin_dp = (char *)ip + hlen;
	fnew.fin_ifs = fin->fin_ifs;
	(void) fr_makefrip(hlen, ip, &fnew);

	i = fr_fastroute(m, mpp, &fnew, NULL);
	return i;
}


int fr_send_icmp_err(type, fin, dst)
int type;
fr_info_t *fin;
int dst;
{
	struct in_addr dst4;
	struct icmp *icmp;
	qpktinfo_t *qpi;
	int hlen, code;
	phy_if_t phy;
	u_short sz;
#ifdef	USE_INET6
	mblk_t *mb;
#endif
	mblk_t *m;
#ifdef	USE_INET6
	ip6_t *ip6;
#endif
	ip_t *ip;
	ipf_stack_t *ifs = fin->fin_ifs;

	if ((type < 0) || (type > ICMP_MAXTYPE))
		return -1;

	code = fin->fin_icode;
#ifdef USE_INET6
	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
		return -1;
#endif

#ifndef	IPFILTER_CKSUM
	if (fr_checkl4sum(fin) == -1)
		return -1;
#endif

	qpi = fin->fin_qpi;

#ifdef	USE_INET6
	mb = fin->fin_qfm;

	if (fin->fin_v == 6) {
		sz = sizeof(ip6_t);
		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
		hlen = sizeof(ip6_t);
		type = icmptoicmp6types[type];
		if (type == ICMP6_DST_UNREACH)
			code = icmptoicmp6unreach[code];
	} else
#endif
	{
		if ((fin->fin_p == IPPROTO_ICMP) &&
		    !(fin->fin_flx & FI_SHORT))
			switch (ntohs(fin->fin_data[0]) >> 8)
			{
			case ICMP_ECHO :
			case ICMP_TSTAMP :
			case ICMP_IREQ :
			case ICMP_MASKREQ :
				break;
			default :
				return 0;
			}

		sz = sizeof(ip_t) * 2;
		sz += 8;		/* 64 bits of data */
		hlen = sizeof(ip_t);
	}

	sz += offsetof(struct icmp, icmp_ip);
	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
		return -1;
	MTYPE(m) = M_DATA;
	m->b_rptr += 64;
	m->b_wptr = m->b_rptr + sz;
	bzero((char *)m->b_rptr, (size_t)sz);
	ip = (ip_t *)m->b_rptr;
	ip->ip_v = fin->fin_v;
	icmp = (struct icmp *)(m->b_rptr + hlen);
	icmp->icmp_type = type & 0xff;
	icmp->icmp_code = code & 0xff;
	phy = (phy_if_t)qpi->qpi_ill;
	if (type == ICMP_UNREACH && (phy != 0) &&
	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );

#ifdef	USE_INET6
	if (fin->fin_v == 6) {
		struct in6_addr dst6;
		int csz;

		if (dst == 0) {
			ipf_stack_t *ifs = fin->fin_ifs;

			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
				       (void *)&dst6, NULL, ifs) == -1) {
				FREE_MB_T(m);
				return -1;
			}
		} else
			dst6 = fin->fin_dst6.in6;

		csz = sz;
		sz -= sizeof(ip6_t);
		ip6 = (ip6_t *)m->b_rptr;
		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
		ip6->ip6_plen = htons((u_short)sz);
		ip6->ip6_nxt = IPPROTO_ICMPV6;
		ip6->ip6_src = dst6;
		ip6->ip6_dst = fin->fin_src6.in6;
		sz -= offsetof(struct icmp, icmp_ip);
		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
		icmp->icmp_cksum = csz - sizeof(ip6_t);
	} else
#endif
	{
		ip->ip_hl = sizeof(*ip) >> 2;
		ip->ip_p = IPPROTO_ICMP;
		ip->ip_id = fin->fin_ip->ip_id;
		ip->ip_tos = fin->fin_ip->ip_tos;
		ip->ip_len = (u_short)sz;
		if (dst == 0) {
			ipf_stack_t *ifs = fin->fin_ifs;

			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
				       (void *)&dst4, NULL, ifs) == -1) {
				FREE_MB_T(m);
				return -1;
			}
		} else {
			dst4 = fin->fin_dst;
		}
		ip->ip_src = dst4;
		ip->ip_dst = fin->fin_src;
		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
		      sizeof(*fin->fin_ip));
		bcopy((char *)fin->fin_ip + fin->fin_hlen,
		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
					     sz - sizeof(ip_t));
	}

	/*
	 * Need to exit out of these so we don't recursively call rw_enter
	 * from fr_qout.
	 */
	return fr_send_ip(fin, m, &m);
}

#include <sys/time.h>
#include <sys/varargs.h>

#ifndef _KERNEL
#include <stdio.h>
#endif

/*
 * Return the first IP Address associated with an interface
 * For IPv6, we walk through the list of logical interfaces and return
 * the address of the first one that isn't a link-local interface.
 * We can't assume that it is :1 because another link-local address
 * may have been assigned there.
 */
/*ARGSUSED*/
int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
int v, atype;
void *ifptr;
struct in_addr  *inp, *inpmask;
ipf_stack_t *ifs;
{
	struct sockaddr_in6 v6addr[2];
	struct sockaddr_in v4addr[2];
	net_ifaddr_t type[2];
	net_handle_t net_data;
	phy_if_t phyif;
	void *array;

	switch (v)
	{
	case 4:
		net_data = ifs->ifs_ipf_ipv4;
		array = v4addr;
		break;
	case 6:
		net_data = ifs->ifs_ipf_ipv6;
		array = v6addr;
		break;
	default:
		net_data = NULL;
		break;
	}

	if (net_data == NULL)
		return -1;

	phyif = (phy_if_t)ifptr;

	switch (atype)
	{
	case FRI_PEERADDR :
		type[0] = NA_PEER;
		break;

	case FRI_BROADCAST :
		type[0] = NA_BROADCAST;
		break;

	default :
		type[0] = NA_ADDRESS;
		break;
	}

	type[1] = NA_NETMASK;

	if (v == 6) {
		lif_if_t idx = 0;

		do {
			idx = net_lifgetnext(net_data, phyif, idx);
			if (net_getlifaddr(net_data, phyif, idx, 2, type,
					   array) < 0)
				return -1;
			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
				break;
		} while (idx != 0);

		if (idx == 0)
			return -1;

		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
					inp, inpmask);
	}

	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
		return -1;

	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
}


u_32_t fr_newisn(fin)
fr_info_t *fin;
{
	static int iss_seq_off = 0;
	u_char hash[16];
	u_32_t newiss;
	MD5_CTX ctx;
	ipf_stack_t *ifs = fin->fin_ifs;

	/*
	 * Compute the base value of the ISS.  It is a hash
	 * of (saddr, sport, daddr, dport, secret).
	 */
	MD5Init(&ctx);

	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
		  sizeof(fin->fin_fi.fi_src));
	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
		  sizeof(fin->fin_fi.fi_dst));
	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));

	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));

	MD5Final(hash, &ctx);

	bcopy(hash, &newiss, sizeof(newiss));

	/*
	 * Now increment our "timer", and add it in to
	 * the computed value.
	 *
	 * XXX Use `addin'?
	 * XXX TCP_ISSINCR too large to use?
	 */
	iss_seq_off += 0x00010000;
	newiss += iss_seq_off;
	return newiss;
}


/* ------------------------------------------------------------------------ */
/* Function:    fr_nextipid                                                 */
/* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
/* Parameters:  fin(I) - pointer to packet information                      */
/*                                                                          */
/* Returns the next IPv4 ID to use for this packet.                         */
/* ------------------------------------------------------------------------ */
u_short fr_nextipid(fin)
fr_info_t *fin;
{
	static u_short ipid = 0;
	u_short id;
	ipf_stack_t *ifs = fin->fin_ifs;

	MUTEX_ENTER(&ifs->ifs_ipf_rw);
	if (fin->fin_pktnum != 0) {
		id = fin->fin_pktnum & 0xffff;
	} else {
		id = ipid++;
	}
	MUTEX_EXIT(&ifs->ifs_ipf_rw);

	return id;
}


#ifndef IPFILTER_CKSUM
/* ARGSUSED */
#endif
INLINE void fr_checkv4sum(fin)
fr_info_t *fin;
{
#ifdef IPFILTER_CKSUM
	if (fr_checkl4sum(fin) == -1)
		fin->fin_flx |= FI_BAD;
#endif
}


#ifdef USE_INET6
# ifndef IPFILTER_CKSUM
/* ARGSUSED */
# endif
INLINE void fr_checkv6sum(fin)
fr_info_t *fin;
{
# ifdef IPFILTER_CKSUM
	if (fr_checkl4sum(fin) == -1)
		fin->fin_flx |= FI_BAD;
# endif
}
#endif /* USE_INET6 */


#if (SOLARIS2 < 7)
void fr_slowtimer()
#else
/*ARGSUSED*/
void fr_slowtimer __P((void *arg))
#endif
{
	ipf_stack_t *ifs = arg;

	READ_ENTER(&ifs->ifs_ipf_global);
	if (ifs->ifs_fr_running != 1) {
		ifs->ifs_fr_timer_id = NULL;
		RWLOCK_EXIT(&ifs->ifs_ipf_global);
		return;
	}
	ipf_expiretokens(ifs);
	fr_fragexpire(ifs);
	fr_timeoutstate(ifs);
	fr_natexpire(ifs);
	fr_authexpire(ifs);
	ifs->ifs_fr_ticks++;
	if (ifs->ifs_fr_running == 1)
		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
		    drv_usectohz(500000));
	else
		ifs->ifs_fr_timer_id = NULL;
	RWLOCK_EXIT(&ifs->ifs_ipf_global);
}


/* ------------------------------------------------------------------------ */
/* Function:    fr_pullup                                                   */
/* Returns:     NULL == pullup failed, else pointer to protocol header      */
/* Parameters:  m(I)   - pointer to buffer where data packet starts         */
/*              fin(I) - pointer to packet information                      */
/*              len(I) - number of bytes to pullup                          */
/*                                                                          */
/* Attempt to move at least len bytes (from the start of the buffer) into a */
/* single buffer for ease of access.  Operating system native functions are */
/* used to manage buffers - if necessary.  If the entire packet ends up in  */
/* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
/* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
/* and ONLY if the pullup succeeds.                                         */
/*                                                                          */
/* We assume that 'min' is a pointer to a buffer that is part of the chain  */
/* of buffers that starts at *fin->fin_mp.                                  */
/* ------------------------------------------------------------------------ */
void *fr_pullup(min, fin, len)
mb_t *min;
fr_info_t *fin;
int len;
{
	qpktinfo_t *qpi = fin->fin_qpi;
	int out = fin->fin_out, dpoff, ipoff;
	mb_t *m = min, *m1, *m2;
	char *ip;
	uint32_t start, stuff, end, value, flags;
	ipf_stack_t *ifs = fin->fin_ifs;

	if (m == NULL)
		return NULL;

	ip = (char *)fin->fin_ip;
	if ((fin->fin_flx & FI_COALESCE) != 0)
		return ip;

	ipoff = fin->fin_ipoff;
	if (fin->fin_dp != NULL)
		dpoff = (char *)fin->fin_dp - (char *)ip;
	else
		dpoff = 0;

	if (M_LEN(m) < len + ipoff) {

		/*
		 * pfil_precheck ensures the IP header is on a 32bit
		 * aligned address so simply fail if that isn't currently
		 * the case (should never happen).
		 */
		int inc = 0;

		if (ipoff > 0) {
			if ((ipoff & 3) != 0) {
				inc = 4 - (ipoff & 3);
				if (m->b_rptr - inc >= m->b_datap->db_base)
					m->b_rptr -= inc;
				else
					inc = 0;
			}
		}

		/*
		 * XXX This is here as a work around for a bug with DEBUG
		 * XXX Solaris kernels.  The problem is b_prev is used by IP
		 * XXX code as a way to stash the phyint_index for a packet,
		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
		 * XXX for both of these to be NULL.  See 6442390.
		 */
		m1 = m;
		m2 = m->b_prev;

		do {
			m1->b_next = NULL;
			m1->b_prev = NULL;
			m1 = m1->b_cont;
		} while (m1);

		/*
		 * Need to preserve checksum information by copying them
		 * to newmp which heads the pulluped message.
		 */
		mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);

		if (pullupmsg(m, len + ipoff + inc) == 0) {
			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
			FREE_MB_T(*fin->fin_mp);
			*fin->fin_mp = NULL;
			fin->fin_m = NULL;
			fin->fin_ip = NULL;
			fin->fin_dp = NULL;
			qpi->qpi_data = NULL;
			return NULL;
		}

		mac_hcksum_set(m, start, stuff, end, value, flags);

		m->b_prev = m2;
		m->b_rptr += inc;
		fin->fin_m = m;
		ip = MTOD(m, char *) + ipoff;
		qpi->qpi_data = ip;
	}

	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
	fin->fin_ip = (ip_t *)ip;
	if (fin->fin_dp != NULL)
		fin->fin_dp = (char *)fin->fin_ip + dpoff;

	if (len == fin->fin_plen)
		fin->fin_flx |= FI_COALESCE;
	return ip;
}


/*
 * Function:	fr_verifysrc
 * Returns:	int (really boolean)
 * Parameters:	fin - packet information
 *
 * Check whether the packet has a valid source address for the interface on
 * which the packet arrived, implementing the "fr_chksrc" feature.
 * Returns true iff the packet's source address is valid.
 */
int fr_verifysrc(fin)
fr_info_t *fin;
{
	net_handle_t net_data_p;
	phy_if_t phy_ifdata_routeto;
	struct sockaddr	sin;
	ipf_stack_t *ifs = fin->fin_ifs;

	if (fin->fin_v == 4) {
		net_data_p = ifs->ifs_ipf_ipv4;
	} else if (fin->fin_v == 6) {
		net_data_p = ifs->ifs_ipf_ipv6;
	} else {
		return (0);
	}

	/* Get the index corresponding to the if name */
	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);

	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
}

/*
 * Return true only if forwarding is enabled on the interface.
 */
static int
fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
{
	lif_if_t lif;

	for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
	    lif = net_lifgetnext(ndp, phyif, lif)) {
		int res;
		uint64_t flags;

		res = net_getlifflags(ndp, phyif, lif, &flags);
		if (res != 0)
			return (0);
		if (flags & IFF_ROUTER)
			return (1);
	}

	return (0);
}

/*
 * Function:	fr_fastroute
 * Returns:	 0: success;
 *		-1: failed
 * Parameters:
 *	mb: the message block where ip head starts
 *	mpp: the pointer to the pointer of the orignal
 *		packet message
 *	fin: packet information
 *	fdp: destination interface information
 *	if it is NULL, no interface information provided.
 *
 * This function is for fastroute/to/dup-to rules. It calls
 * pfil_make_lay2_packet to search route, make lay-2 header
 * ,and identify output queue for the IP packet.
 * The destination address depends on the following conditions:
 * 1: for fastroute rule, fdp is passed in as NULL, so the
 *	destination address is the IP Packet's destination address
 * 2: for to/dup-to rule, if an ip address is specified after
 *	the interface name, this address is the as destination
 *	address. Otherwise IP Packet's destination address is used
 */
int fr_fastroute(mb, mpp, fin, fdp)
mblk_t *mb, **mpp;
fr_info_t *fin;
frdest_t *fdp;
{
        net_handle_t net_data_p;
	net_inject_t *inj;
	mblk_t *mp = NULL;
	frentry_t *fr = fin->fin_fr;
	qpktinfo_t *qpi;
	ip_t *ip;

	struct sockaddr_in *sin;
	struct sockaddr_in6 *sin6;
	struct sockaddr *sinp;
	ipf_stack_t *ifs = fin->fin_ifs;
#ifndef	sparc
	u_short __iplen, __ipoff;
#endif

	if (fin->fin_v == 4) {
		net_data_p = ifs->ifs_ipf_ipv4;
	} else if (fin->fin_v == 6) {
		net_data_p = ifs->ifs_ipf_ipv6;
	} else {
		return (-1);
	}

	/* Check the src here, fin_ifp is the src interface. */
	if (!(fin->fin_flx & FI_GENERATED) &&
	    !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p)) {
		return (-1);
	}

	inj = net_inject_alloc(NETINFO_VERSION);
	if (inj == NULL)
		return -1;

	ip = fin->fin_ip;
	qpi = fin->fin_qpi;

	/*
	 * If this is a duplicate mblk then we want ip to point at that
	 * data, not the original, if and only if it is already pointing at
	 * the current mblk data.
	 *
	 * Otherwise, if it's not a duplicate, and we're not already pointing
	 * at the current mblk data, then we want to ensure that the data
	 * points at ip.
	 */

	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
		ip = (ip_t *)mb->b_rptr;
	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
		qpi->qpi_m->b_rptr = (uchar_t *)ip;
		qpi->qpi_off = 0;
	}

	/*
	 * If there is another M_PROTO, we don't want it
	 */
	if (*mpp != mb) {
		mp = unlinkb(*mpp);
		freeb(*mpp);
		*mpp = mp;
	}

	sinp = (struct sockaddr *)&inj->ni_addr;
	sin = (struct sockaddr_in *)sinp;
	sin6 = (struct sockaddr_in6 *)sinp;
	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
	inj->ni_packet = mb;

	/*
	 * In case we're here due to "to <if>" being used with
	 * "keep state", check that we're going in the correct
	 * direction.
	 */
	if (fdp != NULL) {
		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
			goto bad_fastroute;
		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
		if (fin->fin_v == 4) {
			sin->sin_addr = fdp->fd_ip;
		} else {
			sin6->sin6_addr = fdp->fd_ip6.in6;
		}
	} else {
		if (fin->fin_v == 4) {
			sin->sin_addr = ip->ip_dst;
		} else {
			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
		}
		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
	}

	/* we're checking the destination here */
	if (!(fin->fin_flx & FI_GENERATED) &&
	    !fr_forwarding_enabled(inj->ni_physical, net_data_p)) {
		goto bad_fastroute;
	}

	/*
	 * Clear the hardware checksum flags from packets that we are doing
	 * input processing on as leaving them set will cause the outgoing
	 * NIC (if it supports hardware checksum) to calculate them anew,
	 * using the old (correct) checksums as the pseudo value to start
	 * from.
	 */
	if (fin->fin_out == 0) {
		DB_CKSUMFLAGS(mb) = 0;
	}

	*mpp = mb;

	if (fin->fin_out == 0) {
		void *saveifp;
		u_32_t pass;

		saveifp = fin->fin_ifp;
		fin->fin_ifp = (void *)inj->ni_physical;
		fin->fin_flx &= ~FI_STATE;
		fin->fin_out = 1;
		(void) fr_acctpkt(fin, &pass);
		fin->fin_fr = NULL;
		if (!fr || !(fr->fr_flags & FR_RETMASK))
			(void) fr_checkstate(fin, &pass);
		if (fr_checknatout(fin, NULL) == -1)
			goto bad_fastroute;
		fin->fin_out = 0;
		fin->fin_ifp = saveifp;
	}
#ifndef	sparc
	if (fin->fin_v == 4) {
		__iplen = (u_short)ip->ip_len,
		__ipoff = (u_short)ip->ip_off;

		ip->ip_len = htons(__iplen);
		ip->ip_off = htons(__ipoff);
	}
#endif

	if (net_data_p) {
		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
			net_inject_free(inj);
			return (-1);
		}
	}

	ifs->ifs_fr_frouteok[0]++;
	net_inject_free(inj);
	return 0;
bad_fastroute:
	net_inject_free(inj);
	freemsg(mb);
	ifs->ifs_fr_frouteok[1]++;
	return -1;
}


/* ------------------------------------------------------------------------ */
/* Function:    ipf_hook4_out                                               */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* Calling ipf_hook.                                                        */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook(info, 1, 0, arg);
}
/*ARGSUSED*/
int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook6(info, 1, 0, arg);
}

/* ------------------------------------------------------------------------ */
/* Function:    ipf_hook4_in                                                */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* Calling ipf_hook.                                                        */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook(info, 0, 0, arg);
}
/*ARGSUSED*/
int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook6(info, 0, 0, arg);
}


/* ------------------------------------------------------------------------ */
/* Function:    ipf_hook4_loop_out                                          */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* Calling ipf_hook.                                                        */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook(info, 1, FI_NOCKSUM, arg);
}
/*ARGSUSED*/
int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
}

/* Static constants used by ipf_hook_ether */
static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
};
static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };

/* ------------------------------------------------------------------------ */
/* Function:	ipf_hook_ether                                              */
/* Returns:	int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:	token(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* The ipf_hook_ether hook is currently private to illumos.  It represents  */
/* a layer 2 datapath generally used by virtual machines.  Currently the    */
/* hook is only used by the viona driver to pass along L2 frames for        */
/* inspection.  It requires that the L2 ethernet header is contained within */
/* a single dblk_t (however layers above the L2 header have no restrctions  */
/* in ipf).  ipf does not currently support filtering on L2 fields (e.g.    */
/* filtering on a MAC address or ethertype), however virtual machines do    */
/* not have native IP stack instances where ipf traditionally hooks in.     */
/* Instead this entry point is used to determine if the packet is unicast,  */
/* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the   */
/* traditional ip hooks for filtering.  Non IPv4 or non IPv6 packets are    */
/* not subject to examination.                                              */
/* ------------------------------------------------------------------------ */
int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
    boolean_t out)
{
	struct ether_header *ethp;
	hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
	mblk_t *mp;
	size_t offset, len;
	uint16_t etype;
	boolean_t v6;

	/*
	 * viona will only pass us mblks with the L2 header contained in a
	 * single data block.
	 */
	mp = *hpe->hpe_mp;
	len = MBLKL(mp);

	VERIFY3S(len, >=, sizeof (struct ether_header));

	ethp = (struct ether_header *)mp->b_rptr;
	if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
		struct ether_vlan_header *evh =
		    (struct ether_vlan_header *)ethp;

		VERIFY3S(len, >=, sizeof (struct ether_vlan_header));

		etype = ntohs(evh->ether_type);
		offset = sizeof (*evh);
	} else {
		offset = sizeof (*ethp);
	}

	/*
	 * ipf only support filtering IPv4 and IPv6.  Ignore other types.
	 */
	if (etype == ETHERTYPE_IP)
		v6 = B_FALSE;
	else if (etype == ETHERTYPE_IPV6)
		v6 = B_TRUE;
	else
		return (0);

	if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
		hpe->hpe_flags |= HPE_BROADCAST;
	else if (bcmp(ipf_eth_ipv4_mcast, ethp,
	    sizeof (ipf_eth_ipv4_mcast)) == 0)
		hpe->hpe_flags |= HPE_MULTICAST;
	else if (bcmp(ipf_eth_ipv6_mcast, ethp,
	    sizeof (ipf_eth_ipv6_mcast)) == 0)
		hpe->hpe_flags |= HPE_MULTICAST;

	/* Find the start of the IPv4 or IPv6 header */
	for (; offset >= len; len = MBLKL(mp)) {
		offset -= len;
		mp = mp->b_cont;
		if (mp == NULL) {
			freemsg(*hpe->hpe_mp);
			*hpe->hpe_mp = NULL;
			return (-1);
		}
	}
	hpe->hpe_mb = mp;
	hpe->hpe_hdr = mp->b_rptr + offset;

	return (v6 ? ipf_hook6(info, out, 0, arg) :
	    ipf_hook(info, out, 0, arg));
}

/* ------------------------------------------------------------------------ */
/* Function:    ipf_hookviona_{in,out}                                      */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* The viona hooks are private hooks to illumos. They represents a layer 2  */
/* datapath generally used to implement virtual machines.                   */
/* along L2 packets.                                                        */
/*                                                                          */
/* They end up calling the appropriate traditional ip hooks.                */
/* ------------------------------------------------------------------------ */
int
ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
{
	return (ipf_hook_ether(token, info, arg, B_FALSE));
}

int
ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
{
	return (ipf_hook_ether(token, info, arg, B_TRUE));
}

/* ------------------------------------------------------------------------ */
/* Function:    ipf_hook4_loop_in                                           */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to hook information for firewalling  */
/*                                                                          */
/* Calling ipf_hook.                                                        */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook(info, 0, FI_NOCKSUM, arg);
}
/*ARGSUSED*/
int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
{
	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
}

/* ------------------------------------------------------------------------ */
/* Function:    ipf_hook                                                    */
/* Returns:     int - 0 == packet ok, else problem, free packet if not done */
/* Parameters:  info(I)      - pointer to hook information for firewalling  */
/*              out(I)       - whether packet is going in or out            */
/*              loopback(I)  - whether packet is a loopback packet or not   */
/*                                                                          */
/* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
/* parameters out of the info structure and forms them up to be useful for  */
/* calling ipfilter.                                                        */
/* ------------------------------------------------------------------------ */
int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
{
	hook_pkt_event_t *fw;
	ipf_stack_t *ifs;
	qpktinfo_t qpi;
	int rval, hlen;
	u_short swap;
	phy_if_t phy;
	ip_t *ip;

	ifs = arg;
	fw = (hook_pkt_event_t *)info;

	ASSERT(fw != NULL);
	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;

	ip = fw->hpe_hdr;
	swap = ntohs(ip->ip_len);
	ip->ip_len = swap;
	swap = ntohs(ip->ip_off);
	ip->ip_off = swap;
	hlen = IPH_HDR_LENGTH(ip);

	qpi.qpi_m = fw->hpe_mb;
	qpi.qpi_data = fw->hpe_hdr;
	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
	qpi.qpi_ill = (void *)phy;
	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
	if (qpi.qpi_flags)
		qpi.qpi_flags |= FI_MBCAST;
	qpi.qpi_flags |= loopback;

	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
	    &qpi, fw->hpe_mp, ifs);

	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
	if (rval == 0 && *(fw->hpe_mp) == NULL)
		rval = 1;

	/* Notify IP the packet mblk_t and IP header pointers. */
	fw->hpe_mb = qpi.qpi_m;
	fw->hpe_hdr = qpi.qpi_data;
	if (rval == 0) {
		ip = qpi.qpi_data;
		swap = ntohs(ip->ip_len);
		ip->ip_len = swap;
		swap = ntohs(ip->ip_off);
		ip->ip_off = swap;
	}
	return rval;

}
int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
{
	hook_pkt_event_t *fw;
	int rval, hlen;
	qpktinfo_t qpi;
	phy_if_t phy;

	fw = (hook_pkt_event_t *)info;

	ASSERT(fw != NULL);
	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;

	hlen = sizeof (ip6_t);

	qpi.qpi_m = fw->hpe_mb;
	qpi.qpi_data = fw->hpe_hdr;
	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
	qpi.qpi_ill = (void *)phy;
	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
	if (qpi.qpi_flags)
		qpi.qpi_flags |= FI_MBCAST;
	qpi.qpi_flags |= loopback;

	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
	    &qpi, fw->hpe_mp, arg);

	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
	if (rval == 0 && *(fw->hpe_mp) == NULL)
		rval = 1;

	/* Notify IP the packet mblk_t and IP header pointers. */
	fw->hpe_mb = qpi.qpi_m;
	fw->hpe_hdr = qpi.qpi_data;
	return rval;
}


/* ------------------------------------------------------------------------ */
/* Function:    ipf_nic_event_v4                                            */
/* Returns:     int - 0 == no problems encountered                          */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to information about a NIC event     */
/*                                                                          */
/* Function to receive asynchronous NIC events from IP                      */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
{
	struct sockaddr_in *sin;
	hook_nic_event_t *hn;
	ipf_stack_t *ifs = arg;
	void *new_ifp = NULL;

	if (ifs->ifs_fr_running <= 0)
		return (0);

	hn = (hook_nic_event_t *)info;

	switch (hn->hne_event)
	{
	case NE_PLUMB :
		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
		       ifs);
		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
			      hn->hne_data, ifs);
		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
			     hn->hne_data, ifs);
		break;

	case NE_UNPLUMB :
		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
			      ifs);
		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
		break;

	case NE_ADDRESS_CHANGE :
		/*
		 * We only respond to events for logical interface 0 because
		 * IPFilter only uses the first address given to a network
		 * interface.  We check for hne_lif==1 because the netinfo
		 * code maps adds 1 to the lif number so that it can return
		 * 0 to indicate "no more lifs" when walking them.
		 */
		if (hn->hne_lif == 1) {
			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
			    ifs);
			sin = hn->hne_data;
			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
			    ifs);
		}
		break;

#if SOLARIS2 >= 10
	case NE_IFINDEX_CHANGE :
		WRITE_ENTER(&ifs->ifs_ipf_mutex);

		if (hn->hne_data != NULL) {
			/*
			 * The netinfo passes interface index as int (hne_data should be
			 * handled as a pointer to int), which is always 32bit. We need to
			 * convert it to void pointer here, since interfaces are
			 * represented as pointers to void in IPF. The pointers are 64 bits
			 * long on 64bit platforms. Doing something like
			 *	(void *)((int) x)
			 * will throw warning:
			 *   "cast to pointer from integer of different size"
			 * during 64bit compilation.
			 *
			 * The line below uses (size_t) to typecast int to
			 * size_t, which might be 64bit/32bit (depending
			 * on architecture). Once we have proper 64bit/32bit
			 * type (size_t), we can safely convert it to void pointer.
			 */
			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
		}
		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
		break;
#endif

	default :
		break;
	}

	return 0;
}


/* ------------------------------------------------------------------------ */
/* Function:    ipf_nic_event_v6                                            */
/* Returns:     int - 0 == no problems encountered                          */
/* Parameters:  event(I)     - pointer to event                             */
/*              info(I)      - pointer to information about a NIC event     */
/*                                                                          */
/* Function to receive asynchronous NIC events from IP                      */
/* ------------------------------------------------------------------------ */
/*ARGSUSED*/
int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
{
	struct sockaddr_in6 *sin6;
	hook_nic_event_t *hn;
	ipf_stack_t *ifs = arg;
	void *new_ifp = NULL;

	if (ifs->ifs_fr_running <= 0)
		return (0);

	hn = (hook_nic_event_t *)info;

	switch (hn->hne_event)
	{
	case NE_PLUMB :
		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
		       hn->hne_data, ifs);
		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
			      hn->hne_data, ifs);
		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
			     hn->hne_data, ifs);
		break;

	case NE_UNPLUMB :
		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
			      ifs);
		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
		break;

	case NE_ADDRESS_CHANGE :
		if (hn->hne_lif == 1) {
			sin6 = hn->hne_data;
			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
				       ifs);
		}
		break;

#if SOLARIS2 >= 10
	case NE_IFINDEX_CHANGE :
		WRITE_ENTER(&ifs->ifs_ipf_mutex);
		if (hn->hne_data != NULL) {
			/*
			 * The netinfo passes interface index as int (hne_data should be
			 * handled as a pointer to int), which is always 32bit. We need to
			 * convert it to void pointer here, since interfaces are
			 * represented as pointers to void in IPF. The pointers are 64 bits
			 * long on 64bit platforms. Doing something like
			 *	(void *)((int) x)
			 * will throw warning:
			 *   "cast to pointer from integer of different size"
			 * during 64bit compilation.
			 *
			 * The line below uses (size_t) to typecast int to
			 * size_t, which might be 64bit/32bit (depending
			 * on architecture). Once we have proper 64bit/32bit
			 * type (size_t), we can safely convert it to void pointer.
			 */
			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
		}
		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
		break;
#endif

	default :
		break;
	}

	return 0;
}

/*
 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
 * are needed in Solaris kernel only. We don't need them in
 * ipftest to pretend the ICMP/RST packet was sent as a response.
 */
#if defined(_KERNEL) && (SOLARIS2 >= 10)
/* ------------------------------------------------------------------------ */
/* Function:    fr_make_rst                                                 */
/* Returns:     int - 0 on success, -1 on failure			    */
/* Parameters:  fin(I) - pointer to packet information                      */
/*                                                                          */
/* We must alter the original mblks passed to IPF from IP stack via	    */
/* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
/* IPF can basicaly do only these things with mblk representing the packet: */
/*	leave it as it is (pass the packet)				    */
/*                                                                          */
/*	discard it (block the packet)					    */
/*                                                                          */
/*	alter it (i.e. NAT)						    */
/*                                                                          */
/* As you can see IPF can not simply discard the mblk and supply a new one  */
/* instead to IP stack via FW_HOOKS.					    */
/*                                                                          */
/* The return-rst action for packets coming via NIC is handled as follows:  */
/*	mblk with packet is discarded					    */
/*                                                                          */
/*	new mblk with RST response is constructed and injected to network   */
/*                                                                          */
/* IPF can't inject packets to loopback interface, this is just another	    */
/* limitation we have to deal with here. The only option to send RST	    */
/* response to offending TCP packet coming via loopback is to alter it.	    */
/*									    */
/* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
/* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
/* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
/* ------------------------------------------------------------------------ */
int fr_make_rst(fin)
fr_info_t *fin;
{
	uint16_t tmp_port;
	int rv = -1;
	uint32_t old_ack;
	tcphdr_t *tcp = NULL;
	struct in_addr tmp_src;
#ifdef USE_INET6
	struct in6_addr	tmp_src6;
#endif

	ASSERT(fin->fin_p == IPPROTO_TCP);

	/*
	 * We do not need to adjust chksum, since it is not being checked by
	 * Solaris IP stack for loopback clients.
	 */
	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {

		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
			/* Swap IPv4 addresses. */
			tmp_src = fin->fin_ip->ip_src;
			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
			fin->fin_ip->ip_dst = tmp_src;

			rv = 0;
		}
		else
			tcp = NULL;
	}
#ifdef USE_INET6
	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
		/*
		 * We are relying on fact the next header is TCP, which is true
		 * for regular TCP packets coming in over loopback.
		 */
		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
			/* Swap IPv6 addresses. */
			tmp_src6 = fin->fin_ip6->ip6_src;
			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
			fin->fin_ip6->ip6_dst = tmp_src6;

			rv = 0;
		}
		else
			tcp = NULL;
	}
#endif

	if (tcp != NULL) {
		/*
		 * Adjust TCP header:
		 *	swap ports,
		 *	set flags,
		 *	set correct ACK number
		 */
		tmp_port = tcp->th_sport;
		tcp->th_sport = tcp->th_dport;
		tcp->th_dport = tmp_port;
		old_ack = tcp->th_ack;
		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
		tcp->th_seq = old_ack;
		tcp->th_flags = TH_RST | TH_ACK;
	}

	return (rv);
}

/* ------------------------------------------------------------------------ */
/* Function:    fr_make_icmp_v4                                             */
/* Returns:     int - 0 on success, -1 on failure			    */
/* Parameters:  fin(I) - pointer to packet information                      */
/*                                                                          */
/* Please read comment at fr_make_icmp() wrapper function to get an idea    */
/* what is going to happen here and why. Once you read the comment there,   */
/* continue here with next paragraph.					    */
/*									    */
/* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
/* happen here:								    */
/*	(1) Original mblk is copied (duplicated).			    */
/*                                                                          */
/*	(2) ICMP header is created.					    */
/*                                                                          */
/*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
/*	    data ready then.						    */
/*                                                                          */
/*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
/*                                                                          */
/*	(5) The mblk containing original packet is trimmed to contain IP    */
/*	    header only and ICMP chksum is computed.			    */
/*                                                                          */
/*	(6) The ICMP header we have from (3) is linked to original mblk,    */
/*	    which now contains new IP header. If original packet was spread */
/*	    over several mblks, only the first mblk is kept.		    */
/* ------------------------------------------------------------------------ */
static int fr_make_icmp_v4(fin)
fr_info_t *fin;
{
	struct in_addr tmp_src;
	tcphdr_t *tcp;
	struct icmp *icmp;
	mblk_t *mblk_icmp;
	mblk_t *mblk_ip;
	size_t icmp_pld_len;	/* octets to append to ICMP header */
	size_t orig_iphdr_len;	/* length of IP header only */
	uint32_t sum;
	uint16_t *buf;
	int len;


	if (fin->fin_v != 4)
		return (-1);

	/*
	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
	 */
	tcp = (tcphdr_t *) fin->fin_dp;

	if ((fin->fin_p == IPPROTO_TCP) &&
	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
		return (-1);

	/*
	 * Step (1)
	 *
	 * Make copy of original mblk.
	 *
	 * We want to copy as much data as necessary, not less, not more.  The
	 * ICMPv4 payload length for unreachable messages is:
	 *	original IP header + 8 bytes of L4 (if there are any).
	 *
	 * We determine if there are at least 8 bytes of L4 data following IP
	 * header first.
	 */
	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
		ICMPERR_ICMPHLEN : fin->fin_dlen;
	/*
	 * Since we don't want to copy more data than necessary, we must trim
	 * the original mblk here.  The right way (STREAMish) would be to use
	 * adjmsg() to trim it.  However we would have to calculate the length
	 * argument for adjmsg() from pointers we already have here.
	 *
	 * Since we have pointers and offsets, it's faster and easier for
	 * us to just adjust pointers by hand instead of using adjmsg().
	 */
	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
	fin->fin_m->b_wptr += icmp_pld_len;
	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;

	/*
	 * Also we don't want to copy any L2 stuff, which might precede IP
	 * header, so we have have to set b_rptr to point to the start of IP
	 * header.
	 */
	fin->fin_m->b_rptr += fin->fin_ipoff;
	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
		return (-1);
	fin->fin_m->b_rptr -= fin->fin_ipoff;

	/*
	 * Step (2)
	 *
	 * Create an ICMP header, which will be appened to original mblk later.
	 * ICMP header is just another mblk.
	 */
	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
	if (mblk_icmp == NULL) {
		FREE_MB_T(mblk_ip);
		return (-1);
	}

	MTYPE(mblk_icmp) = M_DATA;
	icmp = (struct icmp *) mblk_icmp->b_wptr;
	icmp->icmp_type = ICMP_UNREACH;
	icmp->icmp_code = fin->fin_icode & 0xFF;
	icmp->icmp_void = 0;
	icmp->icmp_cksum = 0;
	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;

	/*
	 * Step (3)
	 *
	 * Complete ICMP packet - link ICMP header with L4 data from original
	 * IP packet.
	 */
	linkb(mblk_icmp, mblk_ip);

	/*
	 * Step (4)
	 *
	 * Swap IP addresses and change IP header fields accordingly in
	 * original IP packet.
	 *
	 * There is a rule option return-icmp as a dest for physical
	 * interfaces. This option becomes useless for loopback, since IPF box
	 * uses same address as a loopback destination. We ignore the option
	 * here, the ICMP packet will always look like as it would have been
	 * sent from the original destination host.
	 */
	tmp_src = fin->fin_ip->ip_src;
	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
	fin->fin_ip->ip_dst = tmp_src;
	fin->fin_ip->ip_p = IPPROTO_ICMP;
	fin->fin_ip->ip_sum = 0;

	/*
	 * Step (5)
	 *
	 * We trim the orignal mblk to hold IP header only.
	 */
	fin->fin_m->b_wptr = fin->fin_dp;
	orig_iphdr_len = fin->fin_m->b_wptr -
			    (fin->fin_m->b_rptr + fin->fin_ipoff);
	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
			    orig_iphdr_len);

	/*
	 * ICMP chksum calculation. The data we are calculating chksum for are
	 * spread over two mblks, therefore we have to use two for loops.
	 *
	 * First for loop computes chksum part for ICMP header.
	 */
	buf = (uint16_t *) icmp;
	len = ICMPERR_ICMPHLEN;
	for (sum = 0; len > 1; len -= 2)
		sum += *buf++;

	/*
	 * Here we add chksum part for ICMP payload.
	 */
	len = icmp_pld_len;
	buf = (uint16_t *) mblk_ip->b_rptr;
	for (; len > 1; len -= 2)
		sum += *buf++;

	/*
	 * Chksum is done.
	 */
	sum = (sum >> 16) + (sum & 0xffff);
	sum += (sum >> 16);
	icmp->icmp_cksum = ~sum;

	/*
	 * Step (6)
	 *
	 * Release all packet mblks, except the first one.
	 */
	if (fin->fin_m->b_cont != NULL) {
		FREE_MB_T(fin->fin_m->b_cont);
	}

	/*
	 * Append ICMP payload to first mblk, which already contains new IP
	 * header.
	 */
	linkb(fin->fin_m, mblk_icmp);

	return (0);
}

#ifdef USE_INET6
/* ------------------------------------------------------------------------ */
/* Function:    fr_make_icmp_v6                                             */
/* Returns:     int - 0 on success, -1 on failure			    */
/* Parameters:  fin(I) - pointer to packet information                      */
/*									    */
/* Please read comment at fr_make_icmp() wrapper function to get an idea    */
/* what and why is going to happen here. Once you read the comment there,   */
/* continue here with next paragraph.					    */
/*									    */
/* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
/* The algorithm is fairly simple:					    */
/*	1) We need to get copy of complete mblk.			    */
/*									    */
/*	2) New ICMPv6 header is created.				    */
/*									    */
/*	3) The copy of original mblk with packet is linked to ICMPv6	    */
/*	   header.							    */
/*									    */
/*	4) The checksum must be adjusted.				    */
/*									    */
/*	5) IP addresses in original mblk are swapped and IP header data	    */
/*	   are adjusted (protocol number).				    */
/*									    */
/*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
/*	   linked with the ICMPv6 data we got from (3).			    */
/* ------------------------------------------------------------------------ */
static int fr_make_icmp_v6(fin)
fr_info_t *fin;
{
	struct icmp6_hdr *icmp6;
	tcphdr_t *tcp;
	struct in6_addr	tmp_src6;
	size_t icmp_pld_len;
	mblk_t *mblk_ip, *mblk_icmp;

	if (fin->fin_v != 6)
		return (-1);

	/*
	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
	 */
	tcp = (tcphdr_t *) fin->fin_dp;

	if ((fin->fin_p == IPPROTO_TCP) &&
	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
		return (-1);

	/*
	 * Step (1)
	 *
	 * We need to copy complete packet in case of IPv6, no trimming is
	 * needed (except the L2 headers).
	 */
	icmp_pld_len = M_LEN(fin->fin_m);
	fin->fin_m->b_rptr += fin->fin_ipoff;
	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
		return (-1);
	fin->fin_m->b_rptr -= fin->fin_ipoff;

	/*
	 * Step (2)
	 *
	 * Allocate and create ICMP header.
	 */
	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
			BPRI_HI);

	if (mblk_icmp == NULL)
		return (-1);

	MTYPE(mblk_icmp) = M_DATA;
	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
	icmp6->icmp6_type = ICMP6_DST_UNREACH;
	icmp6->icmp6_code = fin->fin_icode & 0xFF;
	icmp6->icmp6_data32[0] = 0;
	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);

	/*
	 * Step (3)
	 *
	 * Link the copy of IP packet to ICMP header.
	 */
	linkb(mblk_icmp, mblk_ip);

	/*
	 * Step (4)
	 *
	 * Calculate chksum - this is much more easier task than in case of
	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
	 * We are making compensation just for change of packet length.
	 */
	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);

	/*
	 * Step (5)
	 *
	 * Swap IP addresses.
	 */
	tmp_src6 = fin->fin_ip6->ip6_src;
	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
	fin->fin_ip6->ip6_dst = tmp_src6;

	/*
	 * and adjust IP header data.
	 */
	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));

	/*
	 * Step (6)
	 *
	 * We must release all linked mblks from original packet and keep only
	 * the first mblk with IP header to link ICMP data.
	 */
	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);

	if (fin->fin_m->b_cont != NULL) {
		FREE_MB_T(fin->fin_m->b_cont);
	}

	/*
	 * Append ICMP payload to IP header.
	 */
	linkb(fin->fin_m, mblk_icmp);

	return (0);
}
#endif	/* USE_INET6 */

/* ------------------------------------------------------------------------ */
/* Function:    fr_make_icmp                                                */
/* Returns:     int - 0 on success, -1 on failure			    */
/* Parameters:  fin(I) - pointer to packet information                      */
/*                                                                          */
/* We must alter the original mblks passed to IPF from IP stack via	    */
/* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
/* comment at fr_make_rst() function.					    */
/*									    */
/* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
/* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
/* protocol version. However there are some details, which are common to    */
/* both IP versions. The details are going to be explained here.	    */
/*                                                                          */
/* The packet looks as follows:						    */
/*    xxx | IP hdr | IP payload    ...	| 				    */
/*    ^   ^        ^            	^				    */
/*    |   |        |            	|				    */
/*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
/*    |   |        |							    */
/*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
/*    |   |								    */
/*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
/*    |      of loopback)						    */
/*    |   								    */
/*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
/*                                                                          */
/* All relevant IP headers are pulled up into the first mblk. It happened   */
/* well in advance before the matching rule was found (the rule, which took */
/* us here, to fr_make_icmp() function).				    */
/*                                                                          */
/* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
/* packet. New packet will be represented as chain of mblks.		    */
/* orig mblk |- b_cont ---.						    */
/*    ^                    `-> ICMP hdr |- b_cont--.			    */
/*    |	                          ^	            `-> duped orig mblk	    */
/*    |                           |				^	    */
/*    `- The original mblk        |				|	    */
/*       will be trimmed to       |				|	    */
/*       to contain IP header     |				|	    */
/*       only                     |				|	    */
/*                                |				|	    */
/*                                `- This is newly		|           */
/*                                   allocated mblk to		|	    */
/*                                   hold ICMPv6 data.		|	    */
/*								|	    */
/*								|	    */
/*								|	    */
/*	    This is the copy of original mblk, it will contain -'	    */
/*	    orignal IP  packet in case of ICMPv6. In case of		    */
/*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
/*	    (TCP/UDP/L4) data from original packet.			    */
/* ------------------------------------------------------------------------ */
int fr_make_icmp(fin)
fr_info_t *fin;
{
	int rv;

	if (fin->fin_v == 4)
		rv = fr_make_icmp_v4(fin);
#ifdef USE_INET6
	else if (fin->fin_v == 6)
		rv = fr_make_icmp_v6(fin);
#endif
	else
		rv = -1;

	return (rv);
}

/* ------------------------------------------------------------------------ */
/* Function:    fr_buf_sum						    */
/* Returns:     unsigned int - sum of buffer buf			    */
/* Parameters:  buf - pointer to buf we want to sum up			    */
/*              len - length of buffer buf				    */
/*                                                                          */
/* Sums buffer buf. The result is used for chksum calculation. The buf	    */
/* argument must be aligned.						    */
/* ------------------------------------------------------------------------ */
static uint32_t fr_buf_sum(buf, len)
const void *buf;
unsigned int len;
{
	uint32_t	sum = 0;
	uint16_t	*b = (uint16_t *)buf;

	while (len > 1) {
		sum += *b++;
		len -= 2;
	}

	if (len == 1)
		sum += htons((*(unsigned char *)b) << 8);

	return (sum);
}

/* ------------------------------------------------------------------------ */
/* Function:    fr_calc_chksum						    */
/* Returns:     void							    */
/* Parameters:  fin - pointer to fr_info_t instance with packet data	    */
/*              pkt - pointer to duplicated packet			    */
/*                                                                          */
/* Calculates all chksums (L3, L4) for packet pkt. Works for both IP	    */
/* versions.								    */
/* ------------------------------------------------------------------------ */
void fr_calc_chksum(fin, pkt)
fr_info_t *fin;
mb_t *pkt;
{
	struct pseudo_hdr {
		union {
			struct in_addr	in4;
#ifdef USE_INET6
			struct in6_addr	in6;
#endif
		} src_addr;
		union {
			struct in_addr	in4;
#ifdef USE_INET6
			struct in6_addr	in6;
#endif
		} dst_addr;
		char		zero;
		char		proto;
		uint16_t	len;
	}	phdr;
	uint32_t	sum, ip_sum;
	void	*buf;
	uint16_t	*l4_csum_p;
	tcphdr_t	*tcp;
	udphdr_t	*udp;
	icmphdr_t	*icmp;
#ifdef USE_INET6
	struct icmp6_hdr	*icmp6;
#endif
	ip_t		*ip;
	unsigned int	len;
	int		pld_len;

	/*
	 * We need to pullup the packet to the single continuous buffer to avoid
	 * potential misaligment of b_rptr member in mblk chain.
	 */
	if (pullupmsg(pkt, -1) == 0) {
		cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
		    " will not be computed by IPF");
		return;
	}

	/*
	 * It is guaranteed IP header starts right at b_rptr, because we are
	 * working with a copy of the original packet.
	 *
	 * Compute pseudo header chksum for TCP and UDP.
	 */
	if ((fin->fin_p == IPPROTO_UDP) ||
	    (fin->fin_p == IPPROTO_TCP)) {
		bzero(&phdr, sizeof (phdr));
#ifdef USE_INET6
		if (fin->fin_v == 6) {
			phdr.src_addr.in6 = fin->fin_srcip6;
			phdr.dst_addr.in6 = fin->fin_dstip6;
		} else {
			phdr.src_addr.in4 = fin->fin_src;
			phdr.dst_addr.in4 = fin->fin_dst;
		}
#else
		phdr.src_addr.in4 = fin->fin_src;
		phdr.dst_addr.in4 = fin->fin_dst;
#endif
		phdr.zero = (char) 0;
		phdr.proto = fin->fin_p;
		phdr.len = htons((uint16_t)fin->fin_dlen);
		sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
	} else {
		sum = 0;
	}

	/*
	 * Set pointer to the L4 chksum field in the packet, set buf pointer to
	 * the L4 header start.
	 */
	switch (fin->fin_p) {
		case IPPROTO_UDP:
			udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
			l4_csum_p = &udp->uh_sum;
			buf = udp;
			break;
		case IPPROTO_TCP:
			tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
			l4_csum_p = &tcp->th_sum;
			buf = tcp;
			break;
		case IPPROTO_ICMP:
			icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
			l4_csum_p = &icmp->icmp_cksum;
			buf = icmp;
			break;
#ifdef USE_INET6
		case IPPROTO_ICMPV6:
			icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
			l4_csum_p = &icmp6->icmp6_cksum;
			buf = icmp6;
			break;
#endif
		default:
			l4_csum_p = NULL;
	}

	/*
	 * Compute L4 chksum if needed.
	 */
	if (l4_csum_p != NULL) {
		*l4_csum_p = (uint16_t)0;
		pld_len = fin->fin_dlen;
		len = pkt->b_wptr - (unsigned char *)buf;
		ASSERT(len == pld_len);
		/*
		 * Add payload sum to pseudoheader sum.
		 */
		sum += fr_buf_sum(buf, len);
		while (sum >> 16)
			sum = (sum & 0xFFFF) + (sum >> 16);

		*l4_csum_p = ~((uint16_t)sum);
		DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
	}

	/*
	 * The IP header chksum is needed just for IPv4.
	 */
	if (fin->fin_v == 4) {
		/*
		 * Compute IPv4 header chksum.
		 */
		ip = (ip_t *)pkt->b_rptr;
		ip->ip_sum = (uint16_t)0;
		ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
		while (ip_sum >> 16)
			ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);

		ip->ip_sum = ~((uint16_t)ip_sum);
		DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
	}

	return;
}

#endif	/* _KERNEL && SOLARIS2 >= 10 */