xref: /illumos-gate/usr/src/uts/common/inet/ilb/ilb.c (revision ab82c29b)
1dbed73cbSSangeeta Misra /*
2dbed73cbSSangeeta Misra  * CDDL HEADER START
3dbed73cbSSangeeta Misra  *
4dbed73cbSSangeeta Misra  * The contents of this file are subject to the terms of the
5dbed73cbSSangeeta Misra  * Common Development and Distribution License (the "License").
6dbed73cbSSangeeta Misra  * You may not use this file except in compliance with the License.
7dbed73cbSSangeeta Misra  *
8dbed73cbSSangeeta Misra  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9dbed73cbSSangeeta Misra  * or http://www.opensolaris.org/os/licensing.
10dbed73cbSSangeeta Misra  * See the License for the specific language governing permissions
11dbed73cbSSangeeta Misra  * and limitations under the License.
12dbed73cbSSangeeta Misra  *
13dbed73cbSSangeeta Misra  * When distributing Covered Code, include this CDDL HEADER in each
14dbed73cbSSangeeta Misra  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15dbed73cbSSangeeta Misra  * If applicable, add the following below this CDDL HEADER, with the
16dbed73cbSSangeeta Misra  * fields enclosed by brackets "[]" replaced with your own identifying
17dbed73cbSSangeeta Misra  * information: Portions Copyright [yyyy] [name of copyright owner]
18dbed73cbSSangeeta Misra  *
19dbed73cbSSangeeta Misra  * CDDL HEADER END
20dbed73cbSSangeeta Misra  */
21dbed73cbSSangeeta Misra 
22dbed73cbSSangeeta Misra /*
23dbed73cbSSangeeta Misra  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24dbed73cbSSangeeta Misra  * Use is subject to license terms.
25dbed73cbSSangeeta Misra  */
26dbed73cbSSangeeta Misra 
27de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h>
28dbed73cbSSangeeta Misra #include <sys/kmem.h>
29dbed73cbSSangeeta Misra #include <sys/ksynch.h>
30dbed73cbSSangeeta Misra #include <sys/systm.h>
31dbed73cbSSangeeta Misra #include <sys/socket.h>
32dbed73cbSSangeeta Misra #include <sys/disp.h>
33dbed73cbSSangeeta Misra #include <sys/taskq.h>
34dbed73cbSSangeeta Misra #include <sys/cmn_err.h>
35dbed73cbSSangeeta Misra #include <sys/strsun.h>
36dbed73cbSSangeeta Misra #include <sys/sdt.h>
37dbed73cbSSangeeta Misra #include <sys/atomic.h>
38dbed73cbSSangeeta Misra #include <netinet/in.h>
39dbed73cbSSangeeta Misra #include <inet/ip.h>
40dbed73cbSSangeeta Misra #include <inet/ip6.h>
41dbed73cbSSangeeta Misra #include <inet/tcp.h>
42dbed73cbSSangeeta Misra #include <inet/udp_impl.h>
43dbed73cbSSangeeta Misra #include <inet/kstatcom.h>
44dbed73cbSSangeeta Misra 
45dbed73cbSSangeeta Misra #include <inet/ilb_ip.h>
46dbed73cbSSangeeta Misra #include "ilb_alg.h"
47dbed73cbSSangeeta Misra #include "ilb_nat.h"
48dbed73cbSSangeeta Misra #include "ilb_conn.h"
49dbed73cbSSangeeta Misra 
50dbed73cbSSangeeta Misra /* ILB kmem cache flag */
51dbed73cbSSangeeta Misra int ilb_kmem_flags = 0;
52dbed73cbSSangeeta Misra 
53dbed73cbSSangeeta Misra /*
54dbed73cbSSangeeta Misra  * The default size for the different hash tables.  Global for all stacks.
55dbed73cbSSangeeta Misra  * But each stack has its own table, just that their sizes are the same.
56dbed73cbSSangeeta Misra  */
57dbed73cbSSangeeta Misra static size_t ilb_rule_hash_size = 2048;
58dbed73cbSSangeeta Misra 
59dbed73cbSSangeeta Misra static size_t ilb_conn_hash_size = 262144;
60dbed73cbSSangeeta Misra 
61dbed73cbSSangeeta Misra static size_t ilb_sticky_hash_size = 262144;
62dbed73cbSSangeeta Misra 
63dbed73cbSSangeeta Misra /* This should be a prime number. */
64dbed73cbSSangeeta Misra static size_t ilb_nat_src_hash_size = 97;
65dbed73cbSSangeeta Misra 
66dbed73cbSSangeeta Misra /* Default NAT cache entry expiry time. */
67dbed73cbSSangeeta Misra static uint32_t ilb_conn_tcp_expiry = 120;
68dbed73cbSSangeeta Misra static uint32_t ilb_conn_udp_expiry = 60;
69dbed73cbSSangeeta Misra 
70dbed73cbSSangeeta Misra /* Default sticky entry expiry time. */
71dbed73cbSSangeeta Misra static uint32_t ilb_sticky_expiry = 60;
72dbed73cbSSangeeta Misra 
73dbed73cbSSangeeta Misra /* addr is assumed to be a uint8_t * to an ipaddr_t. */
74dbed73cbSSangeeta Misra #define	ILB_RULE_HASH(addr, hash_size) \
75dbed73cbSSangeeta Misra 	((*((addr) + 3) * 29791 + *((addr) + 2) * 961 + *((addr) + 1) * 31 + \
76dbed73cbSSangeeta Misra 	*(addr)) & ((hash_size) - 1))
77dbed73cbSSangeeta Misra 
78dbed73cbSSangeeta Misra /*
79dbed73cbSSangeeta Misra  * Note on ILB delayed processing
80dbed73cbSSangeeta Misra  *
81dbed73cbSSangeeta Misra  * To avoid in line removal on some of the data structures, such as rules,
82dbed73cbSSangeeta Misra  * servers and ilb_conn_hash entries, ILB delays such processing to a taskq.
83dbed73cbSSangeeta Misra  * There are three types of ILB taskq:
84dbed73cbSSangeeta Misra  *
85dbed73cbSSangeeta Misra  * 1. rule handling: created at stack initialialization time, ilb_stack_init()
86dbed73cbSSangeeta Misra  * 2. conn hash handling: created at conn hash initialization time,
87dbed73cbSSangeeta Misra  *                        ilb_conn_hash_init()
88dbed73cbSSangeeta Misra  * 3. sticky hash handling: created at sticky hash initialization time,
89dbed73cbSSangeeta Misra  *                          ilb_sticky_hash_init()
90dbed73cbSSangeeta Misra  *
91dbed73cbSSangeeta Misra  * The rule taskq is for processing rule and server removal.  When a user
92dbed73cbSSangeeta Misra  * land rule/server removal request comes in, a taskq is dispatched after
93dbed73cbSSangeeta Misra  * removing the rule/server from all related hashes.  This taskq will wait
94dbed73cbSSangeeta Misra  * until all references to the rule/server are gone before removing it.
95dbed73cbSSangeeta Misra  * So the user land thread requesting the removal does not need to wait
96dbed73cbSSangeeta Misra  * for the removal completion.
97dbed73cbSSangeeta Misra  *
98dbed73cbSSangeeta Misra  * The conn hash/sticky hash taskq is for processing ilb_conn_hash and
99dbed73cbSSangeeta Misra  * ilb_sticky_hash table entry removal.  There are ilb_conn_timer_size timers
100dbed73cbSSangeeta Misra  * and ilb_sticky_timer_size timers running for ilb_conn_hash and
101dbed73cbSSangeeta Misra  * ilb_sticky_hash cleanup respectively.   Each timer is responsible for one
102dbed73cbSSangeeta Misra  * portion (same size) of the hash table.  When a timer fires, it dispatches
103dbed73cbSSangeeta Misra  * a conn hash taskq to clean up its portion of the table.  This avoids in
104dbed73cbSSangeeta Misra  * line processing of the removal.
105dbed73cbSSangeeta Misra  *
106dbed73cbSSangeeta Misra  * There is another delayed processing, the clean up of NAT source address
107dbed73cbSSangeeta Misra  * table.  We just use the timer to directly handle it instead of using
108dbed73cbSSangeeta Misra  * a taskq.  The reason is that the table is small so it is OK to use the
109dbed73cbSSangeeta Misra  * timer.
110dbed73cbSSangeeta Misra  */
111dbed73cbSSangeeta Misra 
112dbed73cbSSangeeta Misra /* ILB rule taskq constants. */
113dbed73cbSSangeeta Misra #define	ILB_RULE_TASKQ_NUM_THR	20
114dbed73cbSSangeeta Misra 
115dbed73cbSSangeeta Misra /* Argument passed to ILB rule taskq routines. */
116dbed73cbSSangeeta Misra typedef	struct {
117dbed73cbSSangeeta Misra 	ilb_stack_t	*ilbs;
118dbed73cbSSangeeta Misra 	ilb_rule_t	*rule;
119dbed73cbSSangeeta Misra } ilb_rule_tq_t;
120dbed73cbSSangeeta Misra 
121dbed73cbSSangeeta Misra /* kstat handling routines. */
122dbed73cbSSangeeta Misra static kstat_t *ilb_kstat_g_init(netstackid_t, ilb_stack_t *);
123dbed73cbSSangeeta Misra static void ilb_kstat_g_fini(netstackid_t, ilb_stack_t *);
124dbed73cbSSangeeta Misra static kstat_t *ilb_rule_kstat_init(netstackid_t, ilb_rule_t *);
125dbed73cbSSangeeta Misra static kstat_t *ilb_server_kstat_init(netstackid_t, ilb_rule_t *,
126dbed73cbSSangeeta Misra     ilb_server_t *);
127dbed73cbSSangeeta Misra 
128dbed73cbSSangeeta Misra /* Rule hash handling routines. */
129dbed73cbSSangeeta Misra static void ilb_rule_hash_init(ilb_stack_t *);
130dbed73cbSSangeeta Misra static void ilb_rule_hash_fini(ilb_stack_t *);
131dbed73cbSSangeeta Misra static void ilb_rule_hash_add(ilb_stack_t *, ilb_rule_t *, const in6_addr_t *);
132dbed73cbSSangeeta Misra static void ilb_rule_hash_del(ilb_rule_t *);
133dbed73cbSSangeeta Misra static ilb_rule_t *ilb_rule_hash(ilb_stack_t *, int, int, in6_addr_t *,
134dbed73cbSSangeeta Misra     in_port_t, zoneid_t, uint32_t, boolean_t *);
135dbed73cbSSangeeta Misra 
136dbed73cbSSangeeta Misra static void ilb_rule_g_add(ilb_stack_t *, ilb_rule_t *);
137dbed73cbSSangeeta Misra static void ilb_rule_g_del(ilb_stack_t *, ilb_rule_t *);
138dbed73cbSSangeeta Misra static void ilb_del_rule_common(ilb_stack_t *, ilb_rule_t *);
139dbed73cbSSangeeta Misra static ilb_rule_t *ilb_find_rule_locked(ilb_stack_t *, zoneid_t, const char *,
140dbed73cbSSangeeta Misra     int *);
141dbed73cbSSangeeta Misra static boolean_t ilb_match_rule(ilb_stack_t *, zoneid_t, const char *, int,
142dbed73cbSSangeeta Misra     int, in_port_t, in_port_t, const in6_addr_t *);
143dbed73cbSSangeeta Misra 
144dbed73cbSSangeeta Misra /* Back end server handling routines. */
145dbed73cbSSangeeta Misra static void ilb_server_free(ilb_server_t *);
146dbed73cbSSangeeta Misra 
147dbed73cbSSangeeta Misra /* Network stack handling routines. */
148dbed73cbSSangeeta Misra static void *ilb_stack_init(netstackid_t, netstack_t *);
149dbed73cbSSangeeta Misra static void ilb_stack_shutdown(netstackid_t, void *);
150dbed73cbSSangeeta Misra static void ilb_stack_fini(netstackid_t, void *);
151dbed73cbSSangeeta Misra 
152dbed73cbSSangeeta Misra /* Sticky connection handling routines. */
153dbed73cbSSangeeta Misra static void ilb_rule_sticky_init(ilb_rule_t *);
154dbed73cbSSangeeta Misra static void ilb_rule_sticky_fini(ilb_rule_t *);
155dbed73cbSSangeeta Misra 
156dbed73cbSSangeeta Misra /* Handy macro to check for unspecified address. */
157dbed73cbSSangeeta Misra #define	IS_ADDR_UNSPEC(addr)						\
158dbed73cbSSangeeta Misra 	(IN6_IS_ADDR_V4MAPPED(addr) ? IN6_IS_ADDR_V4MAPPED_ANY(addr) :	\
159dbed73cbSSangeeta Misra 	    IN6_IS_ADDR_UNSPECIFIED(addr))
160dbed73cbSSangeeta Misra 
161dbed73cbSSangeeta Misra /*
162dbed73cbSSangeeta Misra  * Global kstat instance counter.  When a rule is created, its kstat instance
163dbed73cbSSangeeta Misra  * number is assigned by ilb_kstat_instance and ilb_kstat_instance is
164dbed73cbSSangeeta Misra  * incremented.
165dbed73cbSSangeeta Misra  */
166dbed73cbSSangeeta Misra static uint_t ilb_kstat_instance = 0;
167dbed73cbSSangeeta Misra 
168dbed73cbSSangeeta Misra /*
169dbed73cbSSangeeta Misra  * The ILB global kstat has name ILB_G_KS_NAME and class name ILB_G_KS_CNAME.
170dbed73cbSSangeeta Misra  * A rule's kstat has ILB_RULE_KS_CNAME class name.
171dbed73cbSSangeeta Misra  */
172dbed73cbSSangeeta Misra #define	ILB_G_KS_NAME		"global"
173dbed73cbSSangeeta Misra #define	ILB_G_KS_CNAME		"kstat"
174dbed73cbSSangeeta Misra #define	ILB_RULE_KS_CNAME	"rulestat"
175dbed73cbSSangeeta Misra 
176dbed73cbSSangeeta Misra static kstat_t *
ilb_kstat_g_init(netstackid_t stackid,ilb_stack_t * ilbs)177dbed73cbSSangeeta Misra ilb_kstat_g_init(netstackid_t stackid, ilb_stack_t *ilbs)
178dbed73cbSSangeeta Misra {
179dbed73cbSSangeeta Misra 	kstat_t *ksp;
180dbed73cbSSangeeta Misra 	ilb_g_kstat_t template = {
181dbed73cbSSangeeta Misra 		{ "num_rules",		KSTAT_DATA_UINT64, 0 },
182dbed73cbSSangeeta Misra 		{ "ip_frag_in",		KSTAT_DATA_UINT64, 0 },
183dbed73cbSSangeeta Misra 		{ "ip_frag_dropped",	KSTAT_DATA_UINT64, 0 }
184dbed73cbSSangeeta Misra 	};
185dbed73cbSSangeeta Misra 
186dbed73cbSSangeeta Misra 	ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, 0, ILB_G_KS_NAME,
187dbed73cbSSangeeta Misra 	    ILB_G_KS_CNAME, KSTAT_TYPE_NAMED, NUM_OF_FIELDS(ilb_g_kstat_t),
188dbed73cbSSangeeta Misra 	    KSTAT_FLAG_VIRTUAL, stackid);
189dbed73cbSSangeeta Misra 	if (ksp == NULL)
190dbed73cbSSangeeta Misra 		return (NULL);
191dbed73cbSSangeeta Misra 	bcopy(&template, ilbs->ilbs_kstat, sizeof (template));
192dbed73cbSSangeeta Misra 	ksp->ks_data = ilbs->ilbs_kstat;
193dbed73cbSSangeeta Misra 	ksp->ks_private = (void *)(uintptr_t)stackid;
194dbed73cbSSangeeta Misra 
195dbed73cbSSangeeta Misra 	kstat_install(ksp);
196dbed73cbSSangeeta Misra 	return (ksp);
197dbed73cbSSangeeta Misra }
198dbed73cbSSangeeta Misra 
199dbed73cbSSangeeta Misra static void
ilb_kstat_g_fini(netstackid_t stackid,ilb_stack_t * ilbs)200dbed73cbSSangeeta Misra ilb_kstat_g_fini(netstackid_t stackid, ilb_stack_t *ilbs)
201dbed73cbSSangeeta Misra {
202dbed73cbSSangeeta Misra 	if (ilbs->ilbs_ksp != NULL) {
203dbed73cbSSangeeta Misra 		ASSERT(stackid == (netstackid_t)(uintptr_t)
204dbed73cbSSangeeta Misra 		    ilbs->ilbs_ksp->ks_private);
205dbed73cbSSangeeta Misra 		kstat_delete_netstack(ilbs->ilbs_ksp, stackid);
206dbed73cbSSangeeta Misra 		ilbs->ilbs_ksp = NULL;
207dbed73cbSSangeeta Misra 	}
208dbed73cbSSangeeta Misra }
209dbed73cbSSangeeta Misra 
210dbed73cbSSangeeta Misra static kstat_t *
ilb_rule_kstat_init(netstackid_t stackid,ilb_rule_t * rule)211dbed73cbSSangeeta Misra ilb_rule_kstat_init(netstackid_t stackid, ilb_rule_t *rule)
212dbed73cbSSangeeta Misra {
213dbed73cbSSangeeta Misra 	kstat_t *ksp;
214dbed73cbSSangeeta Misra 	ilb_rule_kstat_t template = {
215dbed73cbSSangeeta Misra 		{ "num_servers",		KSTAT_DATA_UINT64, 0 },
216dbed73cbSSangeeta Misra 		{ "bytes_not_processed",	KSTAT_DATA_UINT64, 0 },
217dbed73cbSSangeeta Misra 		{ "pkt_not_processed",		KSTAT_DATA_UINT64, 0 },
218dbed73cbSSangeeta Misra 		{ "bytes_dropped",		KSTAT_DATA_UINT64, 0 },
219dbed73cbSSangeeta Misra 		{ "pkt_dropped",		KSTAT_DATA_UINT64, 0 },
220dbed73cbSSangeeta Misra 		{ "nomem_bytes_dropped",	KSTAT_DATA_UINT64, 0 },
221dbed73cbSSangeeta Misra 		{ "nomem_pkt_dropped",		KSTAT_DATA_UINT64, 0 },
222dbed73cbSSangeeta Misra 		{ "noport_bytes_dropped",	KSTAT_DATA_UINT64, 0 },
223dbed73cbSSangeeta Misra 		{ "noport_pkt_dropped",		KSTAT_DATA_UINT64, 0 },
224dbed73cbSSangeeta Misra 		{ "icmp_echo_processed",	KSTAT_DATA_UINT64, 0 },
225dbed73cbSSangeeta Misra 		{ "icmp_dropped",		KSTAT_DATA_UINT64, 0 },
226dbed73cbSSangeeta Misra 		{ "icmp_too_big_processed",	KSTAT_DATA_UINT64, 0 },
227dbed73cbSSangeeta Misra 		{ "icmp_too_big_dropped",	KSTAT_DATA_UINT64, 0 }
228dbed73cbSSangeeta Misra 	};
229dbed73cbSSangeeta Misra 
230dbed73cbSSangeeta Misra 	ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance,
231dbed73cbSSangeeta Misra 	    rule->ir_name, ILB_RULE_KS_CNAME, KSTAT_TYPE_NAMED,
232dbed73cbSSangeeta Misra 	    NUM_OF_FIELDS(ilb_rule_kstat_t), KSTAT_FLAG_VIRTUAL, stackid);
233dbed73cbSSangeeta Misra 	if (ksp == NULL)
234dbed73cbSSangeeta Misra 		return (NULL);
235dbed73cbSSangeeta Misra 
236dbed73cbSSangeeta Misra 	bcopy(&template, &rule->ir_kstat, sizeof (template));
237dbed73cbSSangeeta Misra 	ksp->ks_data = &rule->ir_kstat;
238dbed73cbSSangeeta Misra 	ksp->ks_private = (void *)(uintptr_t)stackid;
239dbed73cbSSangeeta Misra 
240dbed73cbSSangeeta Misra 	kstat_install(ksp);
241dbed73cbSSangeeta Misra 	return (ksp);
242dbed73cbSSangeeta Misra }
243dbed73cbSSangeeta Misra 
244dbed73cbSSangeeta Misra static kstat_t *
ilb_server_kstat_init(netstackid_t stackid,ilb_rule_t * rule,ilb_server_t * server)245dbed73cbSSangeeta Misra ilb_server_kstat_init(netstackid_t stackid, ilb_rule_t *rule,
246dbed73cbSSangeeta Misra     ilb_server_t *server)
247dbed73cbSSangeeta Misra {
248dbed73cbSSangeeta Misra 	kstat_t *ksp;
249dbed73cbSSangeeta Misra 	ilb_server_kstat_t template = {
250dbed73cbSSangeeta Misra 		{ "bytes_processed",	KSTAT_DATA_UINT64, 0 },
251dbed73cbSSangeeta Misra 		{ "pkt_processed",	KSTAT_DATA_UINT64, 0 },
252dbed73cbSSangeeta Misra 		{ "ip_address",		KSTAT_DATA_STRING, 0 }
253dbed73cbSSangeeta Misra 	};
254dbed73cbSSangeeta Misra 	char cname_buf[KSTAT_STRLEN];
255dbed73cbSSangeeta Misra 
256dbed73cbSSangeeta Misra 	/* 7 is "-sstat" */
257dbed73cbSSangeeta Misra 	ASSERT(strlen(rule->ir_name) + 7 < KSTAT_STRLEN);
258dbed73cbSSangeeta Misra 	(void) sprintf(cname_buf, "%s-sstat", rule->ir_name);
259dbed73cbSSangeeta Misra 	ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance,
260dbed73cbSSangeeta Misra 	    server->iser_name, cname_buf, KSTAT_TYPE_NAMED,
261dbed73cbSSangeeta Misra 	    NUM_OF_FIELDS(ilb_server_kstat_t), KSTAT_FLAG_VIRTUAL, stackid);
262dbed73cbSSangeeta Misra 	if (ksp == NULL)
263dbed73cbSSangeeta Misra 		return (NULL);
264dbed73cbSSangeeta Misra 
265dbed73cbSSangeeta Misra 	bcopy(&template, &server->iser_kstat, sizeof (template));
266dbed73cbSSangeeta Misra 	ksp->ks_data = &server->iser_kstat;
267dbed73cbSSangeeta Misra 	ksp->ks_private = (void *)(uintptr_t)stackid;
268dbed73cbSSangeeta Misra 
269dbed73cbSSangeeta Misra 	kstat_named_setstr(&server->iser_kstat.ip_address,
270dbed73cbSSangeeta Misra 	    server->iser_ip_addr);
271dbed73cbSSangeeta Misra 	/* We never change the IP address */
272dbed73cbSSangeeta Misra 	ksp->ks_data_size += strlen(server->iser_ip_addr) + 1;
273dbed73cbSSangeeta Misra 
274dbed73cbSSangeeta Misra 	kstat_install(ksp);
275dbed73cbSSangeeta Misra 	return (ksp);
276dbed73cbSSangeeta Misra }
277dbed73cbSSangeeta Misra 
278dbed73cbSSangeeta Misra /* Initialize the rule hash table. */
279dbed73cbSSangeeta Misra static void
ilb_rule_hash_init(ilb_stack_t * ilbs)280dbed73cbSSangeeta Misra ilb_rule_hash_init(ilb_stack_t *ilbs)
281dbed73cbSSangeeta Misra {
282dbed73cbSSangeeta Misra 	int i;
283dbed73cbSSangeeta Misra 
284dbed73cbSSangeeta Misra 	/*
285dbed73cbSSangeeta Misra 	 * If ilbs->ilbs_rule_hash_size is not a power of 2, bump it up to
286dbed73cbSSangeeta Misra 	 * the next power of 2.
287dbed73cbSSangeeta Misra 	 */
288de710d24SJosef 'Jeff' Sipek 	if (!ISP2(ilbs->ilbs_rule_hash_size)) {
289dbed73cbSSangeeta Misra 		for (i = 0; i < 31; i++) {
290dbed73cbSSangeeta Misra 			if (ilbs->ilbs_rule_hash_size < (1 << i))
291dbed73cbSSangeeta Misra 				break;
292dbed73cbSSangeeta Misra 		}
293dbed73cbSSangeeta Misra 		ilbs->ilbs_rule_hash_size = 1 << i;
294dbed73cbSSangeeta Misra 	}
295dbed73cbSSangeeta Misra 	ilbs->ilbs_g_hash = kmem_zalloc(sizeof (ilb_hash_t) *
296dbed73cbSSangeeta Misra 	    ilbs->ilbs_rule_hash_size, KM_SLEEP);
297dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_rule_hash_size; i++) {
298dbed73cbSSangeeta Misra 		mutex_init(&ilbs->ilbs_g_hash[i].ilb_hash_lock, NULL,
299dbed73cbSSangeeta Misra 		    MUTEX_DEFAULT, NULL);
300dbed73cbSSangeeta Misra 	}
301dbed73cbSSangeeta Misra }
302dbed73cbSSangeeta Misra 
303dbed73cbSSangeeta Misra /* Clean up the rule hash table. */
304dbed73cbSSangeeta Misra static void
ilb_rule_hash_fini(ilb_stack_t * ilbs)305dbed73cbSSangeeta Misra ilb_rule_hash_fini(ilb_stack_t *ilbs)
306dbed73cbSSangeeta Misra {
307dbed73cbSSangeeta Misra 	if (ilbs->ilbs_g_hash == NULL)
308dbed73cbSSangeeta Misra 		return;
309dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_g_hash, sizeof (ilb_hash_t) *
310dbed73cbSSangeeta Misra 	    ilbs->ilbs_rule_hash_size);
311dbed73cbSSangeeta Misra }
312dbed73cbSSangeeta Misra 
313dbed73cbSSangeeta Misra /* Add a rule to the rule hash table. */
314dbed73cbSSangeeta Misra static void
ilb_rule_hash_add(ilb_stack_t * ilbs,ilb_rule_t * rule,const in6_addr_t * addr)315dbed73cbSSangeeta Misra ilb_rule_hash_add(ilb_stack_t *ilbs, ilb_rule_t *rule, const in6_addr_t *addr)
316dbed73cbSSangeeta Misra {
317dbed73cbSSangeeta Misra 	int i;
318dbed73cbSSangeeta Misra 
319dbed73cbSSangeeta Misra 	i = ILB_RULE_HASH((uint8_t *)&addr->s6_addr32[3],
320dbed73cbSSangeeta Misra 	    ilbs->ilbs_rule_hash_size);
321dbed73cbSSangeeta Misra 	DTRACE_PROBE2(ilb__rule__hash__add, ilb_rule_t *, rule, int, i);
322dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
323dbed73cbSSangeeta Misra 	rule->ir_hash_next = ilbs->ilbs_g_hash[i].ilb_hash_rule;
324dbed73cbSSangeeta Misra 	if (ilbs->ilbs_g_hash[i].ilb_hash_rule != NULL)
325dbed73cbSSangeeta Misra 		ilbs->ilbs_g_hash[i].ilb_hash_rule->ir_hash_prev = rule;
326dbed73cbSSangeeta Misra 	rule->ir_hash_prev = NULL;
327dbed73cbSSangeeta Misra 	ilbs->ilbs_g_hash[i].ilb_hash_rule = rule;
328dbed73cbSSangeeta Misra 
329dbed73cbSSangeeta Misra 	rule->ir_hash = &ilbs->ilbs_g_hash[i];
330dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
331dbed73cbSSangeeta Misra }
332dbed73cbSSangeeta Misra 
333dbed73cbSSangeeta Misra /*
334dbed73cbSSangeeta Misra  * Remove a rule from the rule hash table.  Note that the rule is not freed
335dbed73cbSSangeeta Misra  * in this routine.
336dbed73cbSSangeeta Misra  */
337dbed73cbSSangeeta Misra static void
ilb_rule_hash_del(ilb_rule_t * rule)338dbed73cbSSangeeta Misra ilb_rule_hash_del(ilb_rule_t *rule)
339dbed73cbSSangeeta Misra {
340dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_hash->ilb_hash_lock);
341dbed73cbSSangeeta Misra 	if (rule->ir_hash->ilb_hash_rule == rule) {
342dbed73cbSSangeeta Misra 		rule->ir_hash->ilb_hash_rule = rule->ir_hash_next;
343dbed73cbSSangeeta Misra 		if (rule->ir_hash_next != NULL)
344dbed73cbSSangeeta Misra 			rule->ir_hash_next->ir_hash_prev = NULL;
345dbed73cbSSangeeta Misra 	} else {
346dbed73cbSSangeeta Misra 		if (rule->ir_hash_prev != NULL)
347dbed73cbSSangeeta Misra 			rule->ir_hash_prev->ir_hash_next =
348dbed73cbSSangeeta Misra 			    rule->ir_hash_next;
349dbed73cbSSangeeta Misra 		if (rule->ir_hash_next != NULL) {
350dbed73cbSSangeeta Misra 			rule->ir_hash_next->ir_hash_prev =
351dbed73cbSSangeeta Misra 			    rule->ir_hash_prev;
352dbed73cbSSangeeta Misra 		}
353dbed73cbSSangeeta Misra 	}
354dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_hash->ilb_hash_lock);
355dbed73cbSSangeeta Misra 
356dbed73cbSSangeeta Misra 	rule->ir_hash_next = NULL;
357dbed73cbSSangeeta Misra 	rule->ir_hash_prev = NULL;
358dbed73cbSSangeeta Misra 	rule->ir_hash = NULL;
359dbed73cbSSangeeta Misra }
360dbed73cbSSangeeta Misra 
361dbed73cbSSangeeta Misra /*
362dbed73cbSSangeeta Misra  * Given the info of a packet, look for a match in the rule hash table.
363dbed73cbSSangeeta Misra  */
364dbed73cbSSangeeta Misra static ilb_rule_t *
ilb_rule_hash(ilb_stack_t * ilbs,int l3,int l4,in6_addr_t * addr,in_port_t port,zoneid_t zoneid,uint32_t len,boolean_t * busy)365dbed73cbSSangeeta Misra ilb_rule_hash(ilb_stack_t *ilbs, int l3, int l4, in6_addr_t *addr,
366dbed73cbSSangeeta Misra     in_port_t port, zoneid_t zoneid, uint32_t len, boolean_t *busy)
367dbed73cbSSangeeta Misra {
368dbed73cbSSangeeta Misra 	int i;
369dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
370dbed73cbSSangeeta Misra 	ipaddr_t v4_addr;
371dbed73cbSSangeeta Misra 
372dbed73cbSSangeeta Misra 	*busy = B_FALSE;
373dbed73cbSSangeeta Misra 	IN6_V4MAPPED_TO_IPADDR(addr, v4_addr);
374dbed73cbSSangeeta Misra 	i = ILB_RULE_HASH((uint8_t *)&v4_addr, ilbs->ilbs_rule_hash_size);
375dbed73cbSSangeeta Misra 	port = ntohs(port);
376dbed73cbSSangeeta Misra 
377dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
378dbed73cbSSangeeta Misra 	for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
379dbed73cbSSangeeta Misra 	    rule = rule->ir_hash_next) {
380dbed73cbSSangeeta Misra 		if (!rule->ir_port_range) {
381dbed73cbSSangeeta Misra 			if (rule->ir_min_port != port)
382dbed73cbSSangeeta Misra 				continue;
383dbed73cbSSangeeta Misra 		} else {
384dbed73cbSSangeeta Misra 			if (port < rule->ir_min_port ||
385dbed73cbSSangeeta Misra 			    port > rule->ir_max_port) {
386dbed73cbSSangeeta Misra 				continue;
387dbed73cbSSangeeta Misra 			}
388dbed73cbSSangeeta Misra 		}
389dbed73cbSSangeeta Misra 		if (rule->ir_ipver != l3 || rule->ir_proto != l4 ||
390dbed73cbSSangeeta Misra 		    rule->ir_zoneid != zoneid) {
391dbed73cbSSangeeta Misra 			continue;
392dbed73cbSSangeeta Misra 		}
393dbed73cbSSangeeta Misra 
394dbed73cbSSangeeta Misra 		if (l3 == IPPROTO_IP) {
395dbed73cbSSangeeta Misra 			if (rule->ir_target_v4 != INADDR_ANY &&
396dbed73cbSSangeeta Misra 			    rule->ir_target_v4 != v4_addr) {
397dbed73cbSSangeeta Misra 				continue;
398dbed73cbSSangeeta Misra 			}
399dbed73cbSSangeeta Misra 		} else {
400dbed73cbSSangeeta Misra 			if (!IN6_IS_ADDR_UNSPECIFIED(&rule->ir_target_v6) &&
401dbed73cbSSangeeta Misra 			    !IN6_ARE_ADDR_EQUAL(addr, &rule->ir_target_v6)) {
402dbed73cbSSangeeta Misra 				continue;
403dbed73cbSSangeeta Misra 			}
404dbed73cbSSangeeta Misra 		}
405dbed73cbSSangeeta Misra 
406dbed73cbSSangeeta Misra 		/*
407dbed73cbSSangeeta Misra 		 * Just update the stats if the rule is disabled.
408dbed73cbSSangeeta Misra 		 */
409dbed73cbSSangeeta Misra 		mutex_enter(&rule->ir_lock);
410dbed73cbSSangeeta Misra 		if (!(rule->ir_flags & ILB_RULE_ENABLED)) {
411dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, pkt_not_processed);
412dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, bytes_not_processed, len);
413dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
414dbed73cbSSangeeta Misra 			rule = NULL;
415dbed73cbSSangeeta Misra 			break;
416dbed73cbSSangeeta Misra 		} else if (rule->ir_flags & ILB_RULE_BUSY) {
417dbed73cbSSangeeta Misra 			/*
418dbed73cbSSangeeta Misra 			 * If we are busy...
419dbed73cbSSangeeta Misra 			 *
420dbed73cbSSangeeta Misra 			 * XXX we should have a queue to postpone the
421dbed73cbSSangeeta Misra 			 * packet processing.  But this requires a
422dbed73cbSSangeeta Misra 			 * mechanism in IP to re-start the packet
423dbed73cbSSangeeta Misra 			 * processing.  So for now, just drop the packet.
424dbed73cbSSangeeta Misra 			 */
425dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, pkt_dropped);
426dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, bytes_dropped, len);
427dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
428dbed73cbSSangeeta Misra 			*busy = B_TRUE;
429dbed73cbSSangeeta Misra 			rule = NULL;
430dbed73cbSSangeeta Misra 			break;
431dbed73cbSSangeeta Misra 		} else {
432dbed73cbSSangeeta Misra 			rule->ir_refcnt++;
433dbed73cbSSangeeta Misra 			ASSERT(rule->ir_refcnt != 1);
434dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
435dbed73cbSSangeeta Misra 			break;
436dbed73cbSSangeeta Misra 		}
437dbed73cbSSangeeta Misra 	}
438dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
439dbed73cbSSangeeta Misra 	return (rule);
440dbed73cbSSangeeta Misra }
441dbed73cbSSangeeta Misra 
442dbed73cbSSangeeta Misra /*
443dbed73cbSSangeeta Misra  * Add a rule to the global rule list.  This list is for finding all rules
444dbed73cbSSangeeta Misra  * in an IP stack.  The caller is assumed to hold the ilbs_g_lock.
445dbed73cbSSangeeta Misra  */
446dbed73cbSSangeeta Misra static void
ilb_rule_g_add(ilb_stack_t * ilbs,ilb_rule_t * rule)447dbed73cbSSangeeta Misra ilb_rule_g_add(ilb_stack_t *ilbs, ilb_rule_t *rule)
448dbed73cbSSangeeta Misra {
449dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
450dbed73cbSSangeeta Misra 	rule->ir_next = ilbs->ilbs_rule_head;
451dbed73cbSSangeeta Misra 	ilbs->ilbs_rule_head = rule;
452dbed73cbSSangeeta Misra 	ILB_KSTAT_UPDATE(ilbs, num_rules, 1);
453dbed73cbSSangeeta Misra }
454dbed73cbSSangeeta Misra 
455dbed73cbSSangeeta Misra /* The call is assumed to hold the ilbs_g_lock. */
456dbed73cbSSangeeta Misra static void
ilb_rule_g_del(ilb_stack_t * ilbs,ilb_rule_t * rule)457dbed73cbSSangeeta Misra ilb_rule_g_del(ilb_stack_t *ilbs, ilb_rule_t *rule)
458dbed73cbSSangeeta Misra {
459dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
460dbed73cbSSangeeta Misra 	ilb_rule_t *prev_rule;
461dbed73cbSSangeeta Misra 
462dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
463dbed73cbSSangeeta Misra 	prev_rule = NULL;
464dbed73cbSSangeeta Misra 	for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
465dbed73cbSSangeeta Misra 	    prev_rule = tmp_rule, tmp_rule = tmp_rule->ir_next) {
466dbed73cbSSangeeta Misra 		if (tmp_rule == rule)
467dbed73cbSSangeeta Misra 			break;
468dbed73cbSSangeeta Misra 	}
469dbed73cbSSangeeta Misra 	if (tmp_rule == NULL) {
470dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
471dbed73cbSSangeeta Misra 		return;
472dbed73cbSSangeeta Misra 	}
473dbed73cbSSangeeta Misra 	if (prev_rule == NULL)
474dbed73cbSSangeeta Misra 		ilbs->ilbs_rule_head = tmp_rule->ir_next;
475dbed73cbSSangeeta Misra 	else
476dbed73cbSSangeeta Misra 		prev_rule->ir_next = tmp_rule->ir_next;
477dbed73cbSSangeeta Misra 	ILB_KSTAT_UPDATE(ilbs, num_rules, -1);
478dbed73cbSSangeeta Misra }
479dbed73cbSSangeeta Misra 
480dbed73cbSSangeeta Misra /*
481dbed73cbSSangeeta Misra  * Helper routine to calculate how many source addresses are in a given
482dbed73cbSSangeeta Misra  * range.
483dbed73cbSSangeeta Misra  */
484dbed73cbSSangeeta Misra static int64_t
num_nat_src_v6(const in6_addr_t * a1,const in6_addr_t * a2)485dbed73cbSSangeeta Misra num_nat_src_v6(const in6_addr_t *a1, const in6_addr_t *a2)
486dbed73cbSSangeeta Misra {
487dbed73cbSSangeeta Misra 	int64_t ret;
488dbed73cbSSangeeta Misra 	uint32_t addr1, addr2;
489dbed73cbSSangeeta Misra 
490dbed73cbSSangeeta Misra 	/*
491dbed73cbSSangeeta Misra 	 * Here we assume that the max number of NAT source cannot be
492dbed73cbSSangeeta Misra 	 * large such that the most significant 2 s6_addr32 must be
493dbed73cbSSangeeta Misra 	 * equal.
494dbed73cbSSangeeta Misra 	 */
495dbed73cbSSangeeta Misra 	addr1 = ntohl(a1->s6_addr32[3]);
496dbed73cbSSangeeta Misra 	addr2 = ntohl(a2->s6_addr32[3]);
497dbed73cbSSangeeta Misra 	if (a1->s6_addr32[0] != a2->s6_addr32[0] ||
498dbed73cbSSangeeta Misra 	    a1->s6_addr32[1] != a2->s6_addr32[1] ||
499dbed73cbSSangeeta Misra 	    a1->s6_addr32[2] > a2->s6_addr32[2] ||
500dbed73cbSSangeeta Misra 	    (a1->s6_addr32[2] == a2->s6_addr32[2] && addr1 > addr2)) {
501dbed73cbSSangeeta Misra 		return (-1);
502dbed73cbSSangeeta Misra 	}
503dbed73cbSSangeeta Misra 	if (a1->s6_addr32[2] == a2->s6_addr32[2]) {
504dbed73cbSSangeeta Misra 		return (addr2 - addr1 + 1);
505dbed73cbSSangeeta Misra 	} else {
506dbed73cbSSangeeta Misra 		ret = (ntohl(a2->s6_addr32[2]) - ntohl(a1->s6_addr32[2]));
507dbed73cbSSangeeta Misra 		ret <<= 32;
508dbed73cbSSangeeta Misra 		ret = ret + addr1 - addr2;
509dbed73cbSSangeeta Misra 		return (ret + 1);
510dbed73cbSSangeeta Misra 	}
511dbed73cbSSangeeta Misra }
512dbed73cbSSangeeta Misra 
513dbed73cbSSangeeta Misra /*
514dbed73cbSSangeeta Misra  * Add an ILB rule.
515dbed73cbSSangeeta Misra  */
516dbed73cbSSangeeta Misra int
ilb_rule_add(ilb_stack_t * ilbs,zoneid_t zoneid,const ilb_rule_cmd_t * cmd)517dbed73cbSSangeeta Misra ilb_rule_add(ilb_stack_t *ilbs, zoneid_t zoneid, const ilb_rule_cmd_t *cmd)
518dbed73cbSSangeeta Misra {
519dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
520dbed73cbSSangeeta Misra 	netstackid_t stackid;
521dbed73cbSSangeeta Misra 	int ret;
522dbed73cbSSangeeta Misra 	in_port_t min_port, max_port;
523dbed73cbSSangeeta Misra 	int64_t num_src;
524dbed73cbSSangeeta Misra 
525dbed73cbSSangeeta Misra 	/* Sanity checks. */
526dbed73cbSSangeeta Misra 	if (cmd->ip_ver != IPPROTO_IP && cmd->ip_ver != IPPROTO_IPV6)
527dbed73cbSSangeeta Misra 		return (EINVAL);
528dbed73cbSSangeeta Misra 
529dbed73cbSSangeeta Misra 	/* Need to support SCTP... */
530dbed73cbSSangeeta Misra 	if (cmd->proto != IPPROTO_TCP && cmd->proto != IPPROTO_UDP)
531dbed73cbSSangeeta Misra 		return (EINVAL);
532dbed73cbSSangeeta Misra 
533dbed73cbSSangeeta Misra 	/* For full NAT, the NAT source must be supplied. */
534dbed73cbSSangeeta Misra 	if (cmd->topo == ILB_TOPO_IMPL_NAT) {
535dbed73cbSSangeeta Misra 		if (IS_ADDR_UNSPEC(&cmd->nat_src_start) ||
536dbed73cbSSangeeta Misra 		    IS_ADDR_UNSPEC(&cmd->nat_src_end)) {
537dbed73cbSSangeeta Misra 			return (EINVAL);
538dbed73cbSSangeeta Misra 		}
539dbed73cbSSangeeta Misra 	}
540dbed73cbSSangeeta Misra 
541dbed73cbSSangeeta Misra 	/* Check invalid mask */
542dbed73cbSSangeeta Misra 	if ((cmd->flags & ILB_RULE_STICKY) &&
543dbed73cbSSangeeta Misra 	    IS_ADDR_UNSPEC(&cmd->sticky_mask)) {
544dbed73cbSSangeeta Misra 		return (EINVAL);
545dbed73cbSSangeeta Misra 	}
546dbed73cbSSangeeta Misra 
547dbed73cbSSangeeta Misra 	/* Port is passed in network byte order. */
548dbed73cbSSangeeta Misra 	min_port = ntohs(cmd->min_port);
549dbed73cbSSangeeta Misra 	max_port = ntohs(cmd->max_port);
550dbed73cbSSangeeta Misra 	if (min_port > max_port)
551dbed73cbSSangeeta Misra 		return (EINVAL);
552dbed73cbSSangeeta Misra 
553dbed73cbSSangeeta Misra 	/* min_port == 0 means "all ports". Make it so */
554dbed73cbSSangeeta Misra 	if (min_port == 0) {
555dbed73cbSSangeeta Misra 		min_port = 1;
556dbed73cbSSangeeta Misra 		max_port = 65535;
557dbed73cbSSangeeta Misra 	}
558dbed73cbSSangeeta Misra 
559dbed73cbSSangeeta Misra 	/* Funny address checking. */
560dbed73cbSSangeeta Misra 	if (cmd->ip_ver == IPPROTO_IP) {
561dbed73cbSSangeeta Misra 		in_addr_t v4_addr1, v4_addr2;
562dbed73cbSSangeeta Misra 
563dbed73cbSSangeeta Misra 		v4_addr1 = cmd->vip.s6_addr32[3];
564dbed73cbSSangeeta Misra 		if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET ||
565dbed73cbSSangeeta Misra 		    CLASSD(v4_addr1) || v4_addr1 == INADDR_BROADCAST ||
566dbed73cbSSangeeta Misra 		    v4_addr1 == INADDR_ANY ||
567dbed73cbSSangeeta Misra 		    !IN6_IS_ADDR_V4MAPPED(&cmd->vip)) {
568dbed73cbSSangeeta Misra 			return (EINVAL);
569dbed73cbSSangeeta Misra 		}
570dbed73cbSSangeeta Misra 
571dbed73cbSSangeeta Misra 		if (cmd->topo == ILB_TOPO_IMPL_NAT) {
572dbed73cbSSangeeta Misra 			v4_addr1 = ntohl(cmd->nat_src_start.s6_addr32[3]);
573dbed73cbSSangeeta Misra 			v4_addr2 = ntohl(cmd->nat_src_end.s6_addr32[3]);
574dbed73cbSSangeeta Misra 			if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET ||
575dbed73cbSSangeeta Misra 			    (*(uchar_t *)&v4_addr2) == IN_LOOPBACKNET ||
576dbed73cbSSangeeta Misra 			    v4_addr1 == INADDR_BROADCAST ||
577dbed73cbSSangeeta Misra 			    v4_addr2 == INADDR_BROADCAST ||
578dbed73cbSSangeeta Misra 			    v4_addr1 == INADDR_ANY || v4_addr2 == INADDR_ANY ||
579dbed73cbSSangeeta Misra 			    CLASSD(v4_addr1) || CLASSD(v4_addr2) ||
580dbed73cbSSangeeta Misra 			    !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) ||
581dbed73cbSSangeeta Misra 			    !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) {
582dbed73cbSSangeeta Misra 				return (EINVAL);
583dbed73cbSSangeeta Misra 			}
584dbed73cbSSangeeta Misra 
585dbed73cbSSangeeta Misra 			num_src = v4_addr2 - v4_addr1 + 1;
586dbed73cbSSangeeta Misra 			if (v4_addr1 > v4_addr2 || num_src > ILB_MAX_NAT_SRC)
587dbed73cbSSangeeta Misra 				return (EINVAL);
588dbed73cbSSangeeta Misra 		}
589dbed73cbSSangeeta Misra 	} else {
590dbed73cbSSangeeta Misra 		if (IN6_IS_ADDR_LOOPBACK(&cmd->vip) ||
591dbed73cbSSangeeta Misra 		    IN6_IS_ADDR_MULTICAST(&cmd->vip) ||
592dbed73cbSSangeeta Misra 		    IN6_IS_ADDR_UNSPECIFIED(&cmd->vip) ||
593dbed73cbSSangeeta Misra 		    IN6_IS_ADDR_V4MAPPED(&cmd->vip)) {
594dbed73cbSSangeeta Misra 			return (EINVAL);
595dbed73cbSSangeeta Misra 		}
596dbed73cbSSangeeta Misra 
597dbed73cbSSangeeta Misra 		if (cmd->topo == ILB_TOPO_IMPL_NAT) {
598dbed73cbSSangeeta Misra 			if (IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_start) ||
599dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_end) ||
600dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_MULTICAST(&cmd->nat_src_start) ||
601dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_MULTICAST(&cmd->nat_src_end) ||
602dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_start) ||
603dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_end) ||
604dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) ||
605dbed73cbSSangeeta Misra 			    IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) {
606dbed73cbSSangeeta Misra 				return (EINVAL);
607dbed73cbSSangeeta Misra 			}
608dbed73cbSSangeeta Misra 
609dbed73cbSSangeeta Misra 			if ((num_src = num_nat_src_v6(&cmd->nat_src_start,
610dbed73cbSSangeeta Misra 			    &cmd->nat_src_end)) < 0 ||
611dbed73cbSSangeeta Misra 			    num_src > ILB_MAX_NAT_SRC) {
612dbed73cbSSangeeta Misra 				return (EINVAL);
613dbed73cbSSangeeta Misra 			}
614dbed73cbSSangeeta Misra 		}
615dbed73cbSSangeeta Misra 	}
616dbed73cbSSangeeta Misra 
617dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
618dbed73cbSSangeeta Misra 	if (ilbs->ilbs_g_hash == NULL)
619dbed73cbSSangeeta Misra 		ilb_rule_hash_init(ilbs);
620dbed73cbSSangeeta Misra 	if (ilbs->ilbs_c2s_conn_hash == NULL) {
621dbed73cbSSangeeta Misra 		ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
622dbed73cbSSangeeta Misra 		ilb_conn_hash_init(ilbs);
623dbed73cbSSangeeta Misra 		ilb_nat_src_init(ilbs);
624dbed73cbSSangeeta Misra 	}
625dbed73cbSSangeeta Misra 
626dbed73cbSSangeeta Misra 	/* Make sure that the new rule does not duplicate an existing one. */
627dbed73cbSSangeeta Misra 	if (ilb_match_rule(ilbs, zoneid, cmd->name, cmd->ip_ver, cmd->proto,
628dbed73cbSSangeeta Misra 	    min_port, max_port, &cmd->vip)) {
629dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
630dbed73cbSSangeeta Misra 		return (EEXIST);
631dbed73cbSSangeeta Misra 	}
632dbed73cbSSangeeta Misra 
633dbed73cbSSangeeta Misra 	rule = kmem_zalloc(sizeof (ilb_rule_t), KM_NOSLEEP);
634dbed73cbSSangeeta Misra 	if (rule == NULL) {
635dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
636dbed73cbSSangeeta Misra 		return (ENOMEM);
637dbed73cbSSangeeta Misra 	}
638dbed73cbSSangeeta Misra 
639dbed73cbSSangeeta Misra 	/* ir_name is all 0 to begin with */
640dbed73cbSSangeeta Misra 	(void) memcpy(rule->ir_name, cmd->name, ILB_RULE_NAMESZ - 1);
641dbed73cbSSangeeta Misra 
6421a5e258fSJosef 'Jeff' Sipek 	rule->ir_ks_instance = atomic_inc_uint_nv(&ilb_kstat_instance);
643dbed73cbSSangeeta Misra 	stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
644dbed73cbSSangeeta Misra 	if ((rule->ir_ksp = ilb_rule_kstat_init(stackid, rule)) == NULL) {
645dbed73cbSSangeeta Misra 		ret = ENOMEM;
646dbed73cbSSangeeta Misra 		goto error;
647dbed73cbSSangeeta Misra 	}
648dbed73cbSSangeeta Misra 
649dbed73cbSSangeeta Misra 	if (cmd->topo == ILB_TOPO_IMPL_NAT) {
650dbed73cbSSangeeta Misra 		rule->ir_nat_src_start = cmd->nat_src_start;
651dbed73cbSSangeeta Misra 		rule->ir_nat_src_end = cmd->nat_src_end;
652dbed73cbSSangeeta Misra 	}
653dbed73cbSSangeeta Misra 
654dbed73cbSSangeeta Misra 	rule->ir_ipver = cmd->ip_ver;
655dbed73cbSSangeeta Misra 	rule->ir_proto = cmd->proto;
656dbed73cbSSangeeta Misra 	rule->ir_topo = cmd->topo;
657dbed73cbSSangeeta Misra 
658dbed73cbSSangeeta Misra 	rule->ir_min_port = min_port;
659dbed73cbSSangeeta Misra 	rule->ir_max_port = max_port;
660dbed73cbSSangeeta Misra 	if (rule->ir_min_port != rule->ir_max_port)
661dbed73cbSSangeeta Misra 		rule->ir_port_range = B_TRUE;
662dbed73cbSSangeeta Misra 	else
663dbed73cbSSangeeta Misra 		rule->ir_port_range = B_FALSE;
664dbed73cbSSangeeta Misra 
665dbed73cbSSangeeta Misra 	rule->ir_zoneid = zoneid;
666dbed73cbSSangeeta Misra 
667dbed73cbSSangeeta Misra 	rule->ir_target_v6 = cmd->vip;
668dbed73cbSSangeeta Misra 	rule->ir_servers = NULL;
669dbed73cbSSangeeta Misra 
670dbed73cbSSangeeta Misra 	/*
671dbed73cbSSangeeta Misra 	 * The default connection drain timeout is indefinite (value 0),
672dbed73cbSSangeeta Misra 	 * meaning we will wait for all connections to finish.  So we
673dbed73cbSSangeeta Misra 	 * can assign cmd->conn_drain_timeout to it directly.
674dbed73cbSSangeeta Misra 	 */
675dbed73cbSSangeeta Misra 	rule->ir_conn_drain_timeout = cmd->conn_drain_timeout;
676dbed73cbSSangeeta Misra 	if (cmd->nat_expiry != 0) {
677dbed73cbSSangeeta Misra 		rule->ir_nat_expiry = cmd->nat_expiry;
678dbed73cbSSangeeta Misra 	} else {
679dbed73cbSSangeeta Misra 		switch (rule->ir_proto) {
680dbed73cbSSangeeta Misra 		case IPPROTO_TCP:
681dbed73cbSSangeeta Misra 			rule->ir_nat_expiry = ilb_conn_tcp_expiry;
682dbed73cbSSangeeta Misra 			break;
683dbed73cbSSangeeta Misra 		case IPPROTO_UDP:
684dbed73cbSSangeeta Misra 			rule->ir_nat_expiry = ilb_conn_udp_expiry;
685dbed73cbSSangeeta Misra 			break;
686dbed73cbSSangeeta Misra 		default:
687dbed73cbSSangeeta Misra 			cmn_err(CE_PANIC, "data corruption: wrong ir_proto: %p",
688dbed73cbSSangeeta Misra 			    (void *)rule);
689dbed73cbSSangeeta Misra 			break;
690dbed73cbSSangeeta Misra 		}
691dbed73cbSSangeeta Misra 	}
692dbed73cbSSangeeta Misra 	if (cmd->sticky_expiry != 0)
693dbed73cbSSangeeta Misra 		rule->ir_sticky_expiry = cmd->sticky_expiry;
694dbed73cbSSangeeta Misra 	else
695dbed73cbSSangeeta Misra 		rule->ir_sticky_expiry = ilb_sticky_expiry;
696dbed73cbSSangeeta Misra 
697dbed73cbSSangeeta Misra 	if (cmd->flags & ILB_RULE_STICKY) {
698dbed73cbSSangeeta Misra 		rule->ir_flags |= ILB_RULE_STICKY;
699dbed73cbSSangeeta Misra 		rule->ir_sticky_mask = cmd->sticky_mask;
700dbed73cbSSangeeta Misra 		if (ilbs->ilbs_sticky_hash == NULL)
701dbed73cbSSangeeta Misra 			ilb_sticky_hash_init(ilbs);
702dbed73cbSSangeeta Misra 	}
703dbed73cbSSangeeta Misra 	if (cmd->flags & ILB_RULE_ENABLED)
704dbed73cbSSangeeta Misra 		rule->ir_flags |= ILB_RULE_ENABLED;
705dbed73cbSSangeeta Misra 
706dbed73cbSSangeeta Misra 	mutex_init(&rule->ir_lock, NULL, MUTEX_DEFAULT, NULL);
707dbed73cbSSangeeta Misra 	cv_init(&rule->ir_cv, NULL, CV_DEFAULT, NULL);
708dbed73cbSSangeeta Misra 
709dbed73cbSSangeeta Misra 	rule->ir_refcnt = 1;
710dbed73cbSSangeeta Misra 
711dbed73cbSSangeeta Misra 	switch (cmd->algo) {
712dbed73cbSSangeeta Misra 	case ILB_ALG_IMPL_ROUNDROBIN:
713dbed73cbSSangeeta Misra 		if ((rule->ir_alg = ilb_alg_rr_init(rule, NULL)) == NULL) {
714dbed73cbSSangeeta Misra 			ret = ENOMEM;
715dbed73cbSSangeeta Misra 			goto error;
716dbed73cbSSangeeta Misra 		}
717dbed73cbSSangeeta Misra 		rule->ir_alg_type = ILB_ALG_IMPL_ROUNDROBIN;
718dbed73cbSSangeeta Misra 		break;
719dbed73cbSSangeeta Misra 	case ILB_ALG_IMPL_HASH_IP:
720dbed73cbSSangeeta Misra 	case ILB_ALG_IMPL_HASH_IP_SPORT:
721dbed73cbSSangeeta Misra 	case ILB_ALG_IMPL_HASH_IP_VIP:
722dbed73cbSSangeeta Misra 		if ((rule->ir_alg = ilb_alg_hash_init(rule,
723dbed73cbSSangeeta Misra 		    &cmd->algo)) == NULL) {
724dbed73cbSSangeeta Misra 			ret = ENOMEM;
725dbed73cbSSangeeta Misra 			goto error;
726dbed73cbSSangeeta Misra 		}
727dbed73cbSSangeeta Misra 		rule->ir_alg_type = cmd->algo;
728dbed73cbSSangeeta Misra 		break;
729dbed73cbSSangeeta Misra 	default:
730dbed73cbSSangeeta Misra 		ret = EINVAL;
731dbed73cbSSangeeta Misra 		goto error;
732dbed73cbSSangeeta Misra 	}
733dbed73cbSSangeeta Misra 
734dbed73cbSSangeeta Misra 	/* Add it to the global list and hash array at the end. */
735dbed73cbSSangeeta Misra 	ilb_rule_g_add(ilbs, rule);
736dbed73cbSSangeeta Misra 	ilb_rule_hash_add(ilbs, rule, &cmd->vip);
737dbed73cbSSangeeta Misra 
738dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
739dbed73cbSSangeeta Misra 
740dbed73cbSSangeeta Misra 	return (0);
741dbed73cbSSangeeta Misra 
742dbed73cbSSangeeta Misra error:
743dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
744dbed73cbSSangeeta Misra 	if (rule->ir_ksp != NULL) {
745dbed73cbSSangeeta Misra 		/* stackid must be initialized if ir_ksp != NULL */
746dbed73cbSSangeeta Misra 		kstat_delete_netstack(rule->ir_ksp, stackid);
747dbed73cbSSangeeta Misra 	}
748dbed73cbSSangeeta Misra 	kmem_free(rule, sizeof (ilb_rule_t));
749dbed73cbSSangeeta Misra 	return (ret);
750dbed73cbSSangeeta Misra }
751dbed73cbSSangeeta Misra 
752dbed73cbSSangeeta Misra /*
753dbed73cbSSangeeta Misra  * The final part in deleting a rule.  Either called directly or by the
754dbed73cbSSangeeta Misra  * taskq dispatched.
755dbed73cbSSangeeta Misra  */
756dbed73cbSSangeeta Misra static void
ilb_rule_del_common(ilb_stack_t * ilbs,ilb_rule_t * tmp_rule)757dbed73cbSSangeeta Misra ilb_rule_del_common(ilb_stack_t *ilbs, ilb_rule_t *tmp_rule)
758dbed73cbSSangeeta Misra {
759dbed73cbSSangeeta Misra 	netstackid_t stackid;
760dbed73cbSSangeeta Misra 	ilb_server_t *server;
761dbed73cbSSangeeta Misra 
762dbed73cbSSangeeta Misra 	stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
763dbed73cbSSangeeta Misra 
764dbed73cbSSangeeta Misra 	/*
765dbed73cbSSangeeta Misra 	 * Let the algorithm know that the rule is going away.  The
766dbed73cbSSangeeta Misra 	 * algorithm fini routine will free all its resources with this
767dbed73cbSSangeeta Misra 	 * rule.
768dbed73cbSSangeeta Misra 	 */
769dbed73cbSSangeeta Misra 	tmp_rule->ir_alg->ilb_alg_fini(&tmp_rule->ir_alg);
770dbed73cbSSangeeta Misra 
771dbed73cbSSangeeta Misra 	while ((server = tmp_rule->ir_servers) != NULL) {
772dbed73cbSSangeeta Misra 		mutex_enter(&server->iser_lock);
773dbed73cbSSangeeta Misra 		ilb_destroy_nat_src(&server->iser_nat_src);
774dbed73cbSSangeeta Misra 		if (tmp_rule->ir_conn_drain_timeout != 0) {
775dbed73cbSSangeeta Misra 			/*
776dbed73cbSSangeeta Misra 			 * The garbage collection thread checks this value
777dbed73cbSSangeeta Misra 			 * without grabing a lock.  So we need to use
778dbed73cbSSangeeta Misra 			 * atomic_swap_64() to make sure that the value seen
779dbed73cbSSangeeta Misra 			 * by gc thread is intact.
780dbed73cbSSangeeta Misra 			 */
781dbed73cbSSangeeta Misra 			(void) atomic_swap_64(
782d3d50737SRafael Vanoni 			    (uint64_t *)&server->iser_die_time,
783d3d50737SRafael Vanoni 			    ddi_get_lbolt64() +
784dbed73cbSSangeeta Misra 			    SEC_TO_TICK(tmp_rule->ir_conn_drain_timeout));
785dbed73cbSSangeeta Misra 		}
786dbed73cbSSangeeta Misra 		while (server->iser_refcnt > 1)
787dbed73cbSSangeeta Misra 			cv_wait(&server->iser_cv, &server->iser_lock);
788dbed73cbSSangeeta Misra 		tmp_rule->ir_servers = server->iser_next;
789dbed73cbSSangeeta Misra 		kstat_delete_netstack(server->iser_ksp, stackid);
790dbed73cbSSangeeta Misra 		kmem_free(server, sizeof (ilb_server_t));
791dbed73cbSSangeeta Misra 	}
792dbed73cbSSangeeta Misra 
793dbed73cbSSangeeta Misra 	ASSERT(tmp_rule->ir_ksp != NULL);
794dbed73cbSSangeeta Misra 	kstat_delete_netstack(tmp_rule->ir_ksp, stackid);
795dbed73cbSSangeeta Misra 
796dbed73cbSSangeeta Misra 	kmem_free(tmp_rule, sizeof (ilb_rule_t));
797dbed73cbSSangeeta Misra }
798dbed73cbSSangeeta Misra 
799dbed73cbSSangeeta Misra /* The routine executed by the delayed rule taskq. */
800dbed73cbSSangeeta Misra static void
ilb_rule_del_tq(void * arg)801dbed73cbSSangeeta Misra ilb_rule_del_tq(void *arg)
802dbed73cbSSangeeta Misra {
803dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs = ((ilb_rule_tq_t *)arg)->ilbs;
804dbed73cbSSangeeta Misra 	ilb_rule_t *rule = ((ilb_rule_tq_t *)arg)->rule;
805dbed73cbSSangeeta Misra 
806dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
807dbed73cbSSangeeta Misra 	while (rule->ir_refcnt > 1)
808dbed73cbSSangeeta Misra 		cv_wait(&rule->ir_cv, &rule->ir_lock);
809dbed73cbSSangeeta Misra 	ilb_rule_del_common(ilbs, rule);
810dbed73cbSSangeeta Misra 	kmem_free(arg, sizeof (ilb_rule_tq_t));
811dbed73cbSSangeeta Misra }
812dbed73cbSSangeeta Misra 
813dbed73cbSSangeeta Misra /* Routine to delete a rule. */
814dbed73cbSSangeeta Misra int
ilb_rule_del(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name)815dbed73cbSSangeeta Misra ilb_rule_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name)
816dbed73cbSSangeeta Misra {
817dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
818dbed73cbSSangeeta Misra 	ilb_rule_tq_t *arg;
819dbed73cbSSangeeta Misra 	int err;
820dbed73cbSSangeeta Misra 
821dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
822dbed73cbSSangeeta Misra 	if ((tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name,
823dbed73cbSSangeeta Misra 	    &err)) == NULL) {
824dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
825dbed73cbSSangeeta Misra 		return (err);
826dbed73cbSSangeeta Misra 	}
827dbed73cbSSangeeta Misra 
828dbed73cbSSangeeta Misra 	/*
829dbed73cbSSangeeta Misra 	 * First remove the rule from the hash array and the global list so
830dbed73cbSSangeeta Misra 	 * that no one can find this rule any more.
831dbed73cbSSangeeta Misra 	 */
832dbed73cbSSangeeta Misra 	ilb_rule_hash_del(tmp_rule);
833dbed73cbSSangeeta Misra 	ilb_rule_g_del(ilbs, tmp_rule);
834dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
835dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(tmp_rule);
836dbed73cbSSangeeta Misra 
837dbed73cbSSangeeta Misra 	/*
838dbed73cbSSangeeta Misra 	 * Now no one can find this rule, we can remove it once all
839dbed73cbSSangeeta Misra 	 * references to it are dropped and all references to the list
840dbed73cbSSangeeta Misra 	 * of servers are dropped.  So dispatch a task to finish the deletion.
841dbed73cbSSangeeta Misra 	 * We do this instead of letting the last one referencing the
842dbed73cbSSangeeta Misra 	 * rule do it.  The reason is that the last one may be the
843dbed73cbSSangeeta Misra 	 * interrupt thread.  We want to minimize the work it needs to
844dbed73cbSSangeeta Misra 	 * do.  Rule deletion is not a critical task so it can be delayed.
845dbed73cbSSangeeta Misra 	 */
846dbed73cbSSangeeta Misra 	arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP);
847dbed73cbSSangeeta Misra 	arg->ilbs = ilbs;
848dbed73cbSSangeeta Misra 	arg->rule = tmp_rule;
849dbed73cbSSangeeta Misra 	(void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, arg,
850dbed73cbSSangeeta Misra 	    TQ_SLEEP);
851dbed73cbSSangeeta Misra 
852dbed73cbSSangeeta Misra 	return (0);
853dbed73cbSSangeeta Misra }
854dbed73cbSSangeeta Misra 
855dbed73cbSSangeeta Misra /*
856dbed73cbSSangeeta Misra  * Given an IP address, check to see if there is a rule using this
857dbed73cbSSangeeta Misra  * as the VIP.  It can be used to check if we need to drop a fragment.
858dbed73cbSSangeeta Misra  */
859dbed73cbSSangeeta Misra boolean_t
ilb_rule_match_vip_v6(ilb_stack_t * ilbs,in6_addr_t * vip,ilb_rule_t ** ret_rule)860dbed73cbSSangeeta Misra ilb_rule_match_vip_v6(ilb_stack_t *ilbs, in6_addr_t *vip, ilb_rule_t **ret_rule)
861dbed73cbSSangeeta Misra {
862dbed73cbSSangeeta Misra 	int i;
863dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
864dbed73cbSSangeeta Misra 	boolean_t ret = B_FALSE;
865dbed73cbSSangeeta Misra 
866dbed73cbSSangeeta Misra 	i = ILB_RULE_HASH((uint8_t *)&vip->s6_addr32[3],
867dbed73cbSSangeeta Misra 	    ilbs->ilbs_rule_hash_size);
868dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
869dbed73cbSSangeeta Misra 	for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
870dbed73cbSSangeeta Misra 	    rule = rule->ir_hash_next) {
871dbed73cbSSangeeta Misra 		if (IN6_ARE_ADDR_EQUAL(vip, &rule->ir_target_v6)) {
872dbed73cbSSangeeta Misra 			mutex_enter(&rule->ir_lock);
873dbed73cbSSangeeta Misra 			if (rule->ir_flags & ILB_RULE_BUSY) {
874dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
875dbed73cbSSangeeta Misra 				break;
876dbed73cbSSangeeta Misra 			}
877dbed73cbSSangeeta Misra 			if (ret_rule != NULL) {
878dbed73cbSSangeeta Misra 				rule->ir_refcnt++;
879dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
880dbed73cbSSangeeta Misra 				*ret_rule = rule;
881dbed73cbSSangeeta Misra 			} else {
882dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
883dbed73cbSSangeeta Misra 			}
884dbed73cbSSangeeta Misra 			ret = B_TRUE;
885dbed73cbSSangeeta Misra 			break;
886dbed73cbSSangeeta Misra 		}
887dbed73cbSSangeeta Misra 	}
888dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
889dbed73cbSSangeeta Misra 	return (ret);
890dbed73cbSSangeeta Misra }
891dbed73cbSSangeeta Misra 
892dbed73cbSSangeeta Misra boolean_t
ilb_rule_match_vip_v4(ilb_stack_t * ilbs,ipaddr_t addr,ilb_rule_t ** ret_rule)893dbed73cbSSangeeta Misra ilb_rule_match_vip_v4(ilb_stack_t *ilbs, ipaddr_t addr, ilb_rule_t **ret_rule)
894dbed73cbSSangeeta Misra {
895dbed73cbSSangeeta Misra 	int i;
896dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
897dbed73cbSSangeeta Misra 	boolean_t ret = B_FALSE;
898dbed73cbSSangeeta Misra 
899dbed73cbSSangeeta Misra 	i = ILB_RULE_HASH((uint8_t *)&addr, ilbs->ilbs_rule_hash_size);
900dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
901dbed73cbSSangeeta Misra 	for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL;
902dbed73cbSSangeeta Misra 	    rule = rule->ir_hash_next) {
903dbed73cbSSangeeta Misra 		if (rule->ir_target_v6.s6_addr32[3] == addr) {
904dbed73cbSSangeeta Misra 			mutex_enter(&rule->ir_lock);
905dbed73cbSSangeeta Misra 			if (rule->ir_flags & ILB_RULE_BUSY) {
906dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
907dbed73cbSSangeeta Misra 				break;
908dbed73cbSSangeeta Misra 			}
909dbed73cbSSangeeta Misra 			if (ret_rule != NULL) {
910dbed73cbSSangeeta Misra 				rule->ir_refcnt++;
911dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
912dbed73cbSSangeeta Misra 				*ret_rule = rule;
913dbed73cbSSangeeta Misra 			} else {
914dbed73cbSSangeeta Misra 				mutex_exit(&rule->ir_lock);
915dbed73cbSSangeeta Misra 			}
916dbed73cbSSangeeta Misra 			ret = B_TRUE;
917dbed73cbSSangeeta Misra 			break;
918dbed73cbSSangeeta Misra 		}
919dbed73cbSSangeeta Misra 	}
920dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock);
921dbed73cbSSangeeta Misra 	return (ret);
922dbed73cbSSangeeta Misra }
923dbed73cbSSangeeta Misra 
924dbed73cbSSangeeta Misra static ilb_rule_t *
ilb_find_rule_locked(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,int * err)925dbed73cbSSangeeta Misra ilb_find_rule_locked(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
926dbed73cbSSangeeta Misra     int *err)
927dbed73cbSSangeeta Misra {
928dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
929dbed73cbSSangeeta Misra 
930dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
931dbed73cbSSangeeta Misra 
932dbed73cbSSangeeta Misra 	for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
933dbed73cbSSangeeta Misra 	    tmp_rule = tmp_rule->ir_next) {
934dbed73cbSSangeeta Misra 		if (tmp_rule->ir_zoneid != zoneid)
935dbed73cbSSangeeta Misra 			continue;
936dbed73cbSSangeeta Misra 		if (strcasecmp(tmp_rule->ir_name, name) == 0) {
937dbed73cbSSangeeta Misra 			mutex_enter(&tmp_rule->ir_lock);
938dbed73cbSSangeeta Misra 			if (tmp_rule->ir_flags & ILB_RULE_BUSY) {
939dbed73cbSSangeeta Misra 				mutex_exit(&tmp_rule->ir_lock);
940dbed73cbSSangeeta Misra 				*err = EINPROGRESS;
941dbed73cbSSangeeta Misra 				return (NULL);
942dbed73cbSSangeeta Misra 			}
943dbed73cbSSangeeta Misra 			tmp_rule->ir_refcnt++;
944dbed73cbSSangeeta Misra 			mutex_exit(&tmp_rule->ir_lock);
945dbed73cbSSangeeta Misra 			*err = 0;
946dbed73cbSSangeeta Misra 			return (tmp_rule);
947dbed73cbSSangeeta Misra 		}
948dbed73cbSSangeeta Misra 	}
949dbed73cbSSangeeta Misra 	*err = ENOENT;
950dbed73cbSSangeeta Misra 	return (NULL);
951dbed73cbSSangeeta Misra }
952dbed73cbSSangeeta Misra 
953dbed73cbSSangeeta Misra /* To find a rule with a given name and zone in the global rule list. */
954dbed73cbSSangeeta Misra ilb_rule_t *
ilb_find_rule(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,int * err)955dbed73cbSSangeeta Misra ilb_find_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
956dbed73cbSSangeeta Misra     int *err)
957dbed73cbSSangeeta Misra {
958dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
959dbed73cbSSangeeta Misra 
960dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
961dbed73cbSSangeeta Misra 	tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, err);
962dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
963dbed73cbSSangeeta Misra 	return (tmp_rule);
964dbed73cbSSangeeta Misra }
965dbed73cbSSangeeta Misra 
966dbed73cbSSangeeta Misra /* Try to match the given packet info and zone ID with a rule. */
967dbed73cbSSangeeta Misra static boolean_t
ilb_match_rule(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,int l3,int l4,in_port_t min_port,in_port_t max_port,const in6_addr_t * addr)968dbed73cbSSangeeta Misra ilb_match_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, int l3,
969dbed73cbSSangeeta Misra     int l4, in_port_t min_port, in_port_t max_port, const in6_addr_t *addr)
970dbed73cbSSangeeta Misra {
971dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
972dbed73cbSSangeeta Misra 
973dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&ilbs->ilbs_g_lock));
974dbed73cbSSangeeta Misra 
975dbed73cbSSangeeta Misra 	for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
976dbed73cbSSangeeta Misra 	    tmp_rule = tmp_rule->ir_next) {
977dbed73cbSSangeeta Misra 		if (tmp_rule->ir_zoneid != zoneid)
978dbed73cbSSangeeta Misra 			continue;
979dbed73cbSSangeeta Misra 
980dbed73cbSSangeeta Misra 		/*
981dbed73cbSSangeeta Misra 		 * We don't allow the same name in different rules even if all
982dbed73cbSSangeeta Misra 		 * the other rule components are different.
983dbed73cbSSangeeta Misra 		 */
984dbed73cbSSangeeta Misra 		if (strcasecmp(tmp_rule->ir_name, name) == 0)
985dbed73cbSSangeeta Misra 			return (B_TRUE);
986dbed73cbSSangeeta Misra 
987dbed73cbSSangeeta Misra 		if (tmp_rule->ir_ipver != l3 || tmp_rule->ir_proto != l4)
988dbed73cbSSangeeta Misra 			continue;
989dbed73cbSSangeeta Misra 
990dbed73cbSSangeeta Misra 		/*
991dbed73cbSSangeeta Misra 		 * ir_min_port and ir_max_port are the same if ir_port_range
992dbed73cbSSangeeta Misra 		 * is false.  In this case, if the ir_min|max_port (same) is
993dbed73cbSSangeeta Misra 		 * outside of the given port range, it is OK.  In other cases,
994dbed73cbSSangeeta Misra 		 * check if min and max port are outside a rule's range.
995dbed73cbSSangeeta Misra 		 */
996dbed73cbSSangeeta Misra 		if (tmp_rule->ir_max_port < min_port ||
997dbed73cbSSangeeta Misra 		    tmp_rule->ir_min_port > max_port) {
998dbed73cbSSangeeta Misra 			continue;
999dbed73cbSSangeeta Misra 		}
1000dbed73cbSSangeeta Misra 
1001dbed73cbSSangeeta Misra 		/*
1002dbed73cbSSangeeta Misra 		 * If l3 is IPv4, the addr passed in is assumed to be
1003dbed73cbSSangeeta Misra 		 * mapped address.
1004dbed73cbSSangeeta Misra 		 */
1005dbed73cbSSangeeta Misra 		if (V6_OR_V4_INADDR_ANY(*addr) ||
1006dbed73cbSSangeeta Misra 		    V6_OR_V4_INADDR_ANY(tmp_rule->ir_target_v6) ||
1007dbed73cbSSangeeta Misra 		    IN6_ARE_ADDR_EQUAL(addr, &tmp_rule->ir_target_v6)) {
1008dbed73cbSSangeeta Misra 			return (B_TRUE);
1009dbed73cbSSangeeta Misra 		}
1010dbed73cbSSangeeta Misra 	}
1011dbed73cbSSangeeta Misra 	return (B_FALSE);
1012dbed73cbSSangeeta Misra }
1013dbed73cbSSangeeta Misra 
1014dbed73cbSSangeeta Misra int
ilb_rule_enable(ilb_stack_t * ilbs,zoneid_t zoneid,const char * rule_name,ilb_rule_t * in_rule)1015dbed73cbSSangeeta Misra ilb_rule_enable(ilb_stack_t *ilbs, zoneid_t zoneid,
1016dbed73cbSSangeeta Misra     const char *rule_name, ilb_rule_t *in_rule)
1017dbed73cbSSangeeta Misra {
1018dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1019dbed73cbSSangeeta Misra 	int err;
1020dbed73cbSSangeeta Misra 
1021dbed73cbSSangeeta Misra 	ASSERT((in_rule == NULL && rule_name != NULL) ||
1022dbed73cbSSangeeta Misra 	    (in_rule != NULL && rule_name == NULL));
1023dbed73cbSSangeeta Misra 	if ((rule = in_rule) == NULL) {
1024dbed73cbSSangeeta Misra 		if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
1025dbed73cbSSangeeta Misra 		    &err)) == NULL) {
1026dbed73cbSSangeeta Misra 			return (err);
1027dbed73cbSSangeeta Misra 		}
1028dbed73cbSSangeeta Misra 	}
1029dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1030dbed73cbSSangeeta Misra 	rule->ir_flags |= ILB_RULE_ENABLED;
1031dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1032dbed73cbSSangeeta Misra 
1033dbed73cbSSangeeta Misra 	/* Only refrele if the rule is passed in. */
1034dbed73cbSSangeeta Misra 	if (in_rule == NULL)
1035dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1036dbed73cbSSangeeta Misra 	return (0);
1037dbed73cbSSangeeta Misra }
1038dbed73cbSSangeeta Misra 
1039dbed73cbSSangeeta Misra int
ilb_rule_disable(ilb_stack_t * ilbs,zoneid_t zoneid,const char * rule_name,ilb_rule_t * in_rule)1040dbed73cbSSangeeta Misra ilb_rule_disable(ilb_stack_t *ilbs, zoneid_t zoneid,
1041dbed73cbSSangeeta Misra     const char *rule_name, ilb_rule_t *in_rule)
1042dbed73cbSSangeeta Misra {
1043dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1044dbed73cbSSangeeta Misra 	int err;
1045dbed73cbSSangeeta Misra 
1046dbed73cbSSangeeta Misra 	ASSERT((in_rule == NULL && rule_name != NULL) ||
1047dbed73cbSSangeeta Misra 	    (in_rule != NULL && rule_name == NULL));
1048dbed73cbSSangeeta Misra 	if ((rule = in_rule) == NULL) {
1049dbed73cbSSangeeta Misra 		if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
1050dbed73cbSSangeeta Misra 		    &err)) == NULL) {
1051dbed73cbSSangeeta Misra 			return (err);
1052dbed73cbSSangeeta Misra 		}
1053dbed73cbSSangeeta Misra 	}
1054dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1055dbed73cbSSangeeta Misra 	rule->ir_flags &= ~ILB_RULE_ENABLED;
1056dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1057dbed73cbSSangeeta Misra 
1058dbed73cbSSangeeta Misra 	/* Only refrele if the rule is passed in. */
1059dbed73cbSSangeeta Misra 	if (in_rule == NULL)
1060dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1061dbed73cbSSangeeta Misra 	return (0);
1062dbed73cbSSangeeta Misra }
1063dbed73cbSSangeeta Misra 
1064dbed73cbSSangeeta Misra /*
1065dbed73cbSSangeeta Misra  * XXX We should probably have a walker function to walk all rules.  For
1066dbed73cbSSangeeta Misra  * now, just add a simple loop for enable/disable/del.
1067dbed73cbSSangeeta Misra  */
1068dbed73cbSSangeeta Misra void
ilb_rule_enable_all(ilb_stack_t * ilbs,zoneid_t zoneid)1069dbed73cbSSangeeta Misra ilb_rule_enable_all(ilb_stack_t *ilbs, zoneid_t zoneid)
1070dbed73cbSSangeeta Misra {
1071dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1072dbed73cbSSangeeta Misra 
1073dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
1074dbed73cbSSangeeta Misra 	for (rule = ilbs->ilbs_rule_head; rule != NULL; rule = rule->ir_next) {
1075dbed73cbSSangeeta Misra 		if (rule->ir_zoneid != zoneid)
1076dbed73cbSSangeeta Misra 			continue;
1077dbed73cbSSangeeta Misra 		/*
1078dbed73cbSSangeeta Misra 		 * No need to hold the rule as we are holding the global
1079dbed73cbSSangeeta Misra 		 * lock so it won't go away.  Ignore the return value here
1080dbed73cbSSangeeta Misra 		 * as the rule is provided so the call cannot fail.
1081dbed73cbSSangeeta Misra 		 */
1082dbed73cbSSangeeta Misra 		(void) ilb_rule_enable(ilbs, zoneid, NULL, rule);
1083dbed73cbSSangeeta Misra 	}
1084dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
1085dbed73cbSSangeeta Misra }
1086dbed73cbSSangeeta Misra 
1087dbed73cbSSangeeta Misra void
ilb_rule_disable_all(ilb_stack_t * ilbs,zoneid_t zoneid)1088dbed73cbSSangeeta Misra ilb_rule_disable_all(ilb_stack_t *ilbs, zoneid_t zoneid)
1089dbed73cbSSangeeta Misra {
1090dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1091dbed73cbSSangeeta Misra 
1092dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
1093dbed73cbSSangeeta Misra 	for (rule = ilbs->ilbs_rule_head; rule != NULL;
1094dbed73cbSSangeeta Misra 	    rule = rule->ir_next) {
1095dbed73cbSSangeeta Misra 		if (rule->ir_zoneid != zoneid)
1096dbed73cbSSangeeta Misra 			continue;
1097dbed73cbSSangeeta Misra 		(void) ilb_rule_disable(ilbs, zoneid, NULL, rule);
1098dbed73cbSSangeeta Misra 	}
1099dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
1100dbed73cbSSangeeta Misra }
1101dbed73cbSSangeeta Misra 
1102dbed73cbSSangeeta Misra void
ilb_rule_del_all(ilb_stack_t * ilbs,zoneid_t zoneid)1103dbed73cbSSangeeta Misra ilb_rule_del_all(ilb_stack_t *ilbs, zoneid_t zoneid)
1104dbed73cbSSangeeta Misra {
1105dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1106dbed73cbSSangeeta Misra 	ilb_rule_tq_t *arg;
1107dbed73cbSSangeeta Misra 
1108dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
1109dbed73cbSSangeeta Misra 	while ((rule = ilbs->ilbs_rule_head) != NULL) {
1110dbed73cbSSangeeta Misra 		if (rule->ir_zoneid != zoneid)
1111dbed73cbSSangeeta Misra 			continue;
1112dbed73cbSSangeeta Misra 		ilb_rule_hash_del(rule);
1113dbed73cbSSangeeta Misra 		ilb_rule_g_del(ilbs, rule);
1114dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
1115dbed73cbSSangeeta Misra 
1116dbed73cbSSangeeta Misra 		arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP);
1117dbed73cbSSangeeta Misra 		arg->ilbs = ilbs;
1118dbed73cbSSangeeta Misra 		arg->rule = rule;
1119dbed73cbSSangeeta Misra 		(void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq,
1120dbed73cbSSangeeta Misra 		    arg, TQ_SLEEP);
1121dbed73cbSSangeeta Misra 
1122dbed73cbSSangeeta Misra 		mutex_enter(&ilbs->ilbs_g_lock);
1123dbed73cbSSangeeta Misra 	}
1124dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
1125dbed73cbSSangeeta Misra }
1126dbed73cbSSangeeta Misra 
1127dbed73cbSSangeeta Misra /*
1128dbed73cbSSangeeta Misra  * This is just an optimization, so don't grab the global lock.  The
1129dbed73cbSSangeeta Misra  * worst case is that we missed a couple packets.
1130dbed73cbSSangeeta Misra  */
1131dbed73cbSSangeeta Misra boolean_t
ilb_has_rules(ilb_stack_t * ilbs)1132dbed73cbSSangeeta Misra ilb_has_rules(ilb_stack_t *ilbs)
1133dbed73cbSSangeeta Misra {
1134dbed73cbSSangeeta Misra 	return (ilbs->ilbs_rule_head != NULL);
1135dbed73cbSSangeeta Misra }
1136dbed73cbSSangeeta Misra 
1137dbed73cbSSangeeta Misra 
1138dbed73cbSSangeeta Misra static int
ilb_server_toggle(ilb_stack_t * ilbs,zoneid_t zoneid,const char * rule_name,ilb_rule_t * rule,in6_addr_t * addr,boolean_t enable)1139dbed73cbSSangeeta Misra ilb_server_toggle(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name,
1140dbed73cbSSangeeta Misra     ilb_rule_t *rule, in6_addr_t *addr, boolean_t enable)
1141dbed73cbSSangeeta Misra {
1142dbed73cbSSangeeta Misra 	ilb_server_t *tmp_server;
1143dbed73cbSSangeeta Misra 	int ret;
1144dbed73cbSSangeeta Misra 
1145dbed73cbSSangeeta Misra 	ASSERT((rule == NULL && rule_name != NULL) ||
1146dbed73cbSSangeeta Misra 	    (rule != NULL && rule_name == NULL));
1147dbed73cbSSangeeta Misra 
1148dbed73cbSSangeeta Misra 	if (rule == NULL) {
1149dbed73cbSSangeeta Misra 		if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
1150dbed73cbSSangeeta Misra 		    &ret)) == NULL) {
1151dbed73cbSSangeeta Misra 			return (ret);
1152dbed73cbSSangeeta Misra 		}
1153dbed73cbSSangeeta Misra 	}
1154dbed73cbSSangeeta Misra 
1155dbed73cbSSangeeta Misra 	/* Once we get a hold on the rule, no server can be added/deleted. */
1156dbed73cbSSangeeta Misra 	for (tmp_server = rule->ir_servers; tmp_server != NULL;
1157dbed73cbSSangeeta Misra 	    tmp_server = tmp_server->iser_next) {
1158dbed73cbSSangeeta Misra 		if (IN6_ARE_ADDR_EQUAL(&tmp_server->iser_addr_v6, addr))
1159dbed73cbSSangeeta Misra 			break;
1160dbed73cbSSangeeta Misra 	}
1161dbed73cbSSangeeta Misra 	if (tmp_server == NULL) {
1162dbed73cbSSangeeta Misra 		ret = ENOENT;
1163dbed73cbSSangeeta Misra 		goto done;
1164dbed73cbSSangeeta Misra 	}
1165dbed73cbSSangeeta Misra 
1166dbed73cbSSangeeta Misra 	if (enable) {
1167dbed73cbSSangeeta Misra 		ret = rule->ir_alg->ilb_alg_server_enable(tmp_server,
1168dbed73cbSSangeeta Misra 		    rule->ir_alg->ilb_alg_data);
1169dbed73cbSSangeeta Misra 		if (ret == 0) {
1170dbed73cbSSangeeta Misra 			tmp_server->iser_enabled = B_TRUE;
1171dbed73cbSSangeeta Misra 			tmp_server->iser_die_time = 0;
1172dbed73cbSSangeeta Misra 		}
1173dbed73cbSSangeeta Misra 	} else {
1174dbed73cbSSangeeta Misra 		ret = rule->ir_alg->ilb_alg_server_disable(tmp_server,
1175dbed73cbSSangeeta Misra 		    rule->ir_alg->ilb_alg_data);
1176dbed73cbSSangeeta Misra 		if (ret == 0) {
1177dbed73cbSSangeeta Misra 			tmp_server->iser_enabled = B_FALSE;
1178dbed73cbSSangeeta Misra 			if (rule->ir_conn_drain_timeout != 0) {
1179dbed73cbSSangeeta Misra 				(void) atomic_swap_64(
1180dbed73cbSSangeeta Misra 				    (uint64_t *)&tmp_server->iser_die_time,
1181d3d50737SRafael Vanoni 				    ddi_get_lbolt64() + SEC_TO_TICK(
1182dbed73cbSSangeeta Misra 				    rule->ir_conn_drain_timeout));
1183dbed73cbSSangeeta Misra 			}
1184dbed73cbSSangeeta Misra 		}
1185dbed73cbSSangeeta Misra 	}
1186dbed73cbSSangeeta Misra 
1187dbed73cbSSangeeta Misra done:
1188dbed73cbSSangeeta Misra 	if (rule_name != NULL)
1189dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1190dbed73cbSSangeeta Misra 	return (ret);
1191dbed73cbSSangeeta Misra }
1192dbed73cbSSangeeta Misra int
ilb_server_enable(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,ilb_rule_t * rule,in6_addr_t * addr)1193dbed73cbSSangeeta Misra ilb_server_enable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
1194dbed73cbSSangeeta Misra     ilb_rule_t *rule, in6_addr_t *addr)
1195dbed73cbSSangeeta Misra {
1196dbed73cbSSangeeta Misra 	return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_TRUE));
1197dbed73cbSSangeeta Misra }
1198dbed73cbSSangeeta Misra 
1199dbed73cbSSangeeta Misra int
ilb_server_disable(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,ilb_rule_t * rule,in6_addr_t * addr)1200dbed73cbSSangeeta Misra ilb_server_disable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
1201dbed73cbSSangeeta Misra     ilb_rule_t *rule, in6_addr_t *addr)
1202dbed73cbSSangeeta Misra {
1203dbed73cbSSangeeta Misra 	return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_FALSE));
1204dbed73cbSSangeeta Misra }
1205dbed73cbSSangeeta Misra 
1206dbed73cbSSangeeta Misra /*
1207dbed73cbSSangeeta Misra  * Add a back end server to a rule.  If the address is IPv4, it is assumed
1208dbed73cbSSangeeta Misra  * to be passed in as a mapped address.
1209dbed73cbSSangeeta Misra  */
1210dbed73cbSSangeeta Misra int
ilb_server_add(ilb_stack_t * ilbs,ilb_rule_t * rule,ilb_server_info_t * info)1211dbed73cbSSangeeta Misra ilb_server_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_info_t *info)
1212dbed73cbSSangeeta Misra {
1213dbed73cbSSangeeta Misra 	ilb_server_t	*server;
1214dbed73cbSSangeeta Misra 	netstackid_t	stackid;
1215dbed73cbSSangeeta Misra 	int		ret = 0;
1216dbed73cbSSangeeta Misra 	in_port_t	min_port, max_port;
1217dbed73cbSSangeeta Misra 	in_port_t	range;
1218dbed73cbSSangeeta Misra 
1219dbed73cbSSangeeta Misra 	/* Port is passed in network byte order. */
1220dbed73cbSSangeeta Misra 	min_port = ntohs(info->min_port);
1221dbed73cbSSangeeta Misra 	max_port = ntohs(info->max_port);
1222dbed73cbSSangeeta Misra 	if (min_port > max_port)
1223dbed73cbSSangeeta Misra 		return (EINVAL);
1224dbed73cbSSangeeta Misra 
1225dbed73cbSSangeeta Misra 	/* min_port == 0 means "all ports". Make it so */
1226dbed73cbSSangeeta Misra 	if (min_port == 0) {
1227dbed73cbSSangeeta Misra 		min_port = 1;
1228dbed73cbSSangeeta Misra 		max_port = 65535;
1229dbed73cbSSangeeta Misra 	}
1230dbed73cbSSangeeta Misra 	range = max_port - min_port;
1231dbed73cbSSangeeta Misra 
1232dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1233dbed73cbSSangeeta Misra 	/* If someone is already doing server add/del, sleeps and wait. */
1234dbed73cbSSangeeta Misra 	while (rule->ir_flags & ILB_RULE_BUSY) {
1235dbed73cbSSangeeta Misra 		if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
1236dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
1237dbed73cbSSangeeta Misra 			return (EINTR);
1238dbed73cbSSangeeta Misra 		}
1239dbed73cbSSangeeta Misra 	}
1240dbed73cbSSangeeta Misra 
1241dbed73cbSSangeeta Misra 	/*
1242dbed73cbSSangeeta Misra 	 * Set the rule to be busy to make sure that no new packet can
1243dbed73cbSSangeeta Misra 	 * use this rule.
1244dbed73cbSSangeeta Misra 	 */
1245dbed73cbSSangeeta Misra 	rule->ir_flags |= ILB_RULE_BUSY;
1246dbed73cbSSangeeta Misra 
1247dbed73cbSSangeeta Misra 	/* Now wait for all other guys to finish their work. */
1248dbed73cbSSangeeta Misra 	while (rule->ir_refcnt > 2) {
1249dbed73cbSSangeeta Misra 		if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
1250dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
1251dbed73cbSSangeeta Misra 			ret = EINTR;
1252dbed73cbSSangeeta Misra 			goto end;
1253dbed73cbSSangeeta Misra 		}
1254dbed73cbSSangeeta Misra 	}
1255dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1256dbed73cbSSangeeta Misra 
1257dbed73cbSSangeeta Misra 	/* Sanity checks... */
1258dbed73cbSSangeeta Misra 	if ((IN6_IS_ADDR_V4MAPPED(&info->addr) &&
1259dbed73cbSSangeeta Misra 	    rule->ir_ipver != IPPROTO_IP) ||
1260dbed73cbSSangeeta Misra 	    (!IN6_IS_ADDR_V4MAPPED(&info->addr) &&
1261dbed73cbSSangeeta Misra 	    rule->ir_ipver != IPPROTO_IPV6)) {
1262dbed73cbSSangeeta Misra 		ret = EINVAL;
1263dbed73cbSSangeeta Misra 		goto end;
1264dbed73cbSSangeeta Misra 	}
1265dbed73cbSSangeeta Misra 
1266dbed73cbSSangeeta Misra 	/*
1267dbed73cbSSangeeta Misra 	 * Check for valid port range.
1268dbed73cbSSangeeta Misra 	 *
1269dbed73cbSSangeeta Misra 	 * For DSR, there can be no port shifting.  Hence the server
1270dbed73cbSSangeeta Misra 	 * specification must be the same as the rule's.
1271dbed73cbSSangeeta Misra 	 *
1272dbed73cbSSangeeta Misra 	 * For half-NAT/NAT, the range must either be 0 (port collapsing) or
1273dbed73cbSSangeeta Misra 	 * it must be equal to the same value as the rule port range.
1274dbed73cbSSangeeta Misra 	 *
1275dbed73cbSSangeeta Misra 	 */
1276dbed73cbSSangeeta Misra 	if (rule->ir_topo == ILB_TOPO_IMPL_DSR) {
1277dbed73cbSSangeeta Misra 		if (rule->ir_max_port != max_port ||
1278dbed73cbSSangeeta Misra 		    rule->ir_min_port != min_port) {
1279dbed73cbSSangeeta Misra 			ret = EINVAL;
1280dbed73cbSSangeeta Misra 			goto end;
1281dbed73cbSSangeeta Misra 		}
1282dbed73cbSSangeeta Misra 	} else {
1283dbed73cbSSangeeta Misra 		if ((range != rule->ir_max_port - rule->ir_min_port) &&
1284dbed73cbSSangeeta Misra 		    range != 0) {
1285dbed73cbSSangeeta Misra 			ret = EINVAL;
1286dbed73cbSSangeeta Misra 			goto end;
1287dbed73cbSSangeeta Misra 		}
1288dbed73cbSSangeeta Misra 	}
1289dbed73cbSSangeeta Misra 
1290dbed73cbSSangeeta Misra 	/* Check for duplicate. */
1291dbed73cbSSangeeta Misra 	for (server = rule->ir_servers; server != NULL;
1292dbed73cbSSangeeta Misra 	    server = server->iser_next) {
1293dbed73cbSSangeeta Misra 		if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, &info->addr) ||
1294dbed73cbSSangeeta Misra 		    strcasecmp(server->iser_name, info->name) == 0) {
1295dbed73cbSSangeeta Misra 			break;
1296dbed73cbSSangeeta Misra 		}
1297dbed73cbSSangeeta Misra 	}
1298dbed73cbSSangeeta Misra 	if (server != NULL) {
1299dbed73cbSSangeeta Misra 		ret = EEXIST;
1300dbed73cbSSangeeta Misra 		goto end;
1301dbed73cbSSangeeta Misra 	}
1302dbed73cbSSangeeta Misra 
1303dbed73cbSSangeeta Misra 	if ((server = kmem_zalloc(sizeof (ilb_server_t), KM_NOSLEEP)) == NULL) {
1304dbed73cbSSangeeta Misra 		ret = ENOMEM;
1305dbed73cbSSangeeta Misra 		goto end;
1306dbed73cbSSangeeta Misra 	}
1307dbed73cbSSangeeta Misra 
1308dbed73cbSSangeeta Misra 	(void) memcpy(server->iser_name, info->name, ILB_SERVER_NAMESZ - 1);
1309dbed73cbSSangeeta Misra 	(void) inet_ntop(AF_INET6, &info->addr, server->iser_ip_addr,
1310dbed73cbSSangeeta Misra 	    sizeof (server->iser_ip_addr));
1311dbed73cbSSangeeta Misra 	stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private;
1312dbed73cbSSangeeta Misra 	server->iser_ksp = ilb_server_kstat_init(stackid, rule, server);
1313dbed73cbSSangeeta Misra 	if (server->iser_ksp == NULL) {
1314dbed73cbSSangeeta Misra 		kmem_free(server, sizeof (ilb_server_t));
1315dbed73cbSSangeeta Misra 		ret = EINVAL;
1316dbed73cbSSangeeta Misra 		goto end;
1317dbed73cbSSangeeta Misra 	}
1318dbed73cbSSangeeta Misra 
1319dbed73cbSSangeeta Misra 	server->iser_stackid = stackid;
1320dbed73cbSSangeeta Misra 	server->iser_addr_v6 = info->addr;
1321dbed73cbSSangeeta Misra 	server->iser_min_port = min_port;
1322dbed73cbSSangeeta Misra 	server->iser_max_port = max_port;
1323dbed73cbSSangeeta Misra 	if (min_port != max_port)
1324dbed73cbSSangeeta Misra 		server->iser_port_range = B_TRUE;
1325dbed73cbSSangeeta Misra 	else
1326dbed73cbSSangeeta Misra 		server->iser_port_range = B_FALSE;
1327dbed73cbSSangeeta Misra 
1328dbed73cbSSangeeta Misra 	/*
1329dbed73cbSSangeeta Misra 	 * If the rule uses NAT, find/create the NAT source entry to use
1330dbed73cbSSangeeta Misra 	 * for this server.
1331dbed73cbSSangeeta Misra 	 */
1332dbed73cbSSangeeta Misra 	if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
1333dbed73cbSSangeeta Misra 		in_port_t port;
1334dbed73cbSSangeeta Misra 
1335dbed73cbSSangeeta Misra 		/*
1336dbed73cbSSangeeta Misra 		 * If the server uses a port range, our port allocation
1337dbed73cbSSangeeta Misra 		 * scheme needs to treat it as a wildcard.  Refer to the
1338dbed73cbSSangeeta Misra 		 * comments in ilb_nat.c about the scheme.
1339dbed73cbSSangeeta Misra 		 */
1340dbed73cbSSangeeta Misra 		if (server->iser_port_range)
1341dbed73cbSSangeeta Misra 			port = 0;
1342dbed73cbSSangeeta Misra 		else
1343dbed73cbSSangeeta Misra 			port = server->iser_min_port;
1344dbed73cbSSangeeta Misra 
1345dbed73cbSSangeeta Misra 		if ((ret = ilb_create_nat_src(ilbs, &server->iser_nat_src,
1346dbed73cbSSangeeta Misra 		    &server->iser_addr_v6, port, &rule->ir_nat_src_start,
1347dbed73cbSSangeeta Misra 		    num_nat_src_v6(&rule->ir_nat_src_start,
1348dbed73cbSSangeeta Misra 		    &rule->ir_nat_src_end))) != 0) {
1349dbed73cbSSangeeta Misra 			kstat_delete_netstack(server->iser_ksp, stackid);
1350dbed73cbSSangeeta Misra 			kmem_free(server, sizeof (ilb_server_t));
1351dbed73cbSSangeeta Misra 			goto end;
1352dbed73cbSSangeeta Misra 		}
1353dbed73cbSSangeeta Misra 	}
1354dbed73cbSSangeeta Misra 
1355dbed73cbSSangeeta Misra 	/*
1356dbed73cbSSangeeta Misra 	 * The iser_lock is only used to protect iser_refcnt.  All the other
1357dbed73cbSSangeeta Misra 	 * fields in ilb_server_t should not change, except for iser_enabled.
1358dbed73cbSSangeeta Misra 	 * The worst thing that can happen if iser_enabled is messed up is
1359dbed73cbSSangeeta Misra 	 * that one or two packets may not be load balanced to a server
1360dbed73cbSSangeeta Misra 	 * correctly.
1361dbed73cbSSangeeta Misra 	 */
1362dbed73cbSSangeeta Misra 	server->iser_refcnt = 1;
1363dbed73cbSSangeeta Misra 	server->iser_enabled = info->flags & ILB_SERVER_ENABLED ? B_TRUE :
1364dbed73cbSSangeeta Misra 	    B_FALSE;
1365dbed73cbSSangeeta Misra 	mutex_init(&server->iser_lock, NULL, MUTEX_DEFAULT, NULL);
1366dbed73cbSSangeeta Misra 	cv_init(&server->iser_cv, NULL, CV_DEFAULT, NULL);
1367dbed73cbSSangeeta Misra 
1368dbed73cbSSangeeta Misra 	/* Let the load balancing algorithm know about the addition. */
1369dbed73cbSSangeeta Misra 	ASSERT(rule->ir_alg != NULL);
1370dbed73cbSSangeeta Misra 	if ((ret = rule->ir_alg->ilb_alg_server_add(server,
1371dbed73cbSSangeeta Misra 	    rule->ir_alg->ilb_alg_data)) != 0) {
1372dbed73cbSSangeeta Misra 		kstat_delete_netstack(server->iser_ksp, stackid);
1373dbed73cbSSangeeta Misra 		kmem_free(server, sizeof (ilb_server_t));
1374dbed73cbSSangeeta Misra 		goto end;
1375dbed73cbSSangeeta Misra 	}
1376dbed73cbSSangeeta Misra 
1377dbed73cbSSangeeta Misra 	/*
1378dbed73cbSSangeeta Misra 	 * No need to hold ir_lock since no other thread should manipulate
1379dbed73cbSSangeeta Misra 	 * the following fields until ILB_RULE_BUSY is cleared.
1380dbed73cbSSangeeta Misra 	 */
1381dbed73cbSSangeeta Misra 	if (rule->ir_servers == NULL) {
1382dbed73cbSSangeeta Misra 		server->iser_next = NULL;
1383dbed73cbSSangeeta Misra 	} else {
1384dbed73cbSSangeeta Misra 		server->iser_next = rule->ir_servers;
1385dbed73cbSSangeeta Misra 	}
1386dbed73cbSSangeeta Misra 	rule->ir_servers = server;
1387dbed73cbSSangeeta Misra 	ILB_R_KSTAT(rule, num_servers);
1388dbed73cbSSangeeta Misra 
1389dbed73cbSSangeeta Misra end:
1390dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1391dbed73cbSSangeeta Misra 	rule->ir_flags &= ~ILB_RULE_BUSY;
1392dbed73cbSSangeeta Misra 	cv_signal(&rule->ir_cv);
1393dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1394dbed73cbSSangeeta Misra 	return (ret);
1395dbed73cbSSangeeta Misra }
1396dbed73cbSSangeeta Misra 
1397dbed73cbSSangeeta Misra /* The routine executed by the delayed rule processing taskq. */
1398dbed73cbSSangeeta Misra static void
ilb_server_del_tq(void * arg)1399dbed73cbSSangeeta Misra ilb_server_del_tq(void *arg)
1400dbed73cbSSangeeta Misra {
1401dbed73cbSSangeeta Misra 	ilb_server_t *server = (ilb_server_t *)arg;
1402dbed73cbSSangeeta Misra 
1403dbed73cbSSangeeta Misra 	mutex_enter(&server->iser_lock);
1404dbed73cbSSangeeta Misra 	while (server->iser_refcnt > 1)
1405dbed73cbSSangeeta Misra 		cv_wait(&server->iser_cv, &server->iser_lock);
1406dbed73cbSSangeeta Misra 	kstat_delete_netstack(server->iser_ksp, server->iser_stackid);
1407dbed73cbSSangeeta Misra 	kmem_free(server, sizeof (ilb_server_t));
1408dbed73cbSSangeeta Misra }
1409dbed73cbSSangeeta Misra 
1410dbed73cbSSangeeta Misra /*
1411dbed73cbSSangeeta Misra  * Delete a back end server from a rule.  If the address is IPv4, it is assumed
1412dbed73cbSSangeeta Misra  * to be passed in as a mapped address.
1413dbed73cbSSangeeta Misra  */
1414dbed73cbSSangeeta Misra int
ilb_server_del(ilb_stack_t * ilbs,zoneid_t zoneid,const char * rule_name,ilb_rule_t * rule,in6_addr_t * addr)1415dbed73cbSSangeeta Misra ilb_server_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name,
1416dbed73cbSSangeeta Misra     ilb_rule_t *rule, in6_addr_t *addr)
1417dbed73cbSSangeeta Misra {
1418dbed73cbSSangeeta Misra 	ilb_server_t	*server;
1419dbed73cbSSangeeta Misra 	ilb_server_t	*prev_server;
1420dbed73cbSSangeeta Misra 	int		ret = 0;
1421dbed73cbSSangeeta Misra 
1422dbed73cbSSangeeta Misra 	ASSERT((rule == NULL && rule_name != NULL) ||
1423dbed73cbSSangeeta Misra 	    (rule != NULL && rule_name == NULL));
1424dbed73cbSSangeeta Misra 	if (rule == NULL) {
1425dbed73cbSSangeeta Misra 		if ((rule = ilb_find_rule(ilbs, zoneid, rule_name,
1426dbed73cbSSangeeta Misra 		    &ret)) == NULL) {
1427dbed73cbSSangeeta Misra 			return (ret);
1428dbed73cbSSangeeta Misra 		}
1429dbed73cbSSangeeta Misra 	}
1430dbed73cbSSangeeta Misra 
1431dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1432dbed73cbSSangeeta Misra 	/* If someone is already doing server add/del, sleeps and wait. */
1433dbed73cbSSangeeta Misra 	while (rule->ir_flags & ILB_RULE_BUSY) {
1434dbed73cbSSangeeta Misra 		if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
1435dbed73cbSSangeeta Misra 			if (rule_name != NULL) {
1436dbed73cbSSangeeta Misra 				if (--rule->ir_refcnt <= 2)
1437dbed73cbSSangeeta Misra 					cv_signal(&rule->ir_cv);
1438dbed73cbSSangeeta Misra 			}
1439dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
1440dbed73cbSSangeeta Misra 			return (EINTR);
1441dbed73cbSSangeeta Misra 		}
1442dbed73cbSSangeeta Misra 	}
1443dbed73cbSSangeeta Misra 	/*
1444dbed73cbSSangeeta Misra 	 * Set the rule to be busy to make sure that no new packet can
1445dbed73cbSSangeeta Misra 	 * use this rule.
1446dbed73cbSSangeeta Misra 	 */
1447dbed73cbSSangeeta Misra 	rule->ir_flags |= ILB_RULE_BUSY;
1448dbed73cbSSangeeta Misra 
1449dbed73cbSSangeeta Misra 	/* Now wait for all other guys to finish their work. */
1450dbed73cbSSangeeta Misra 	while (rule->ir_refcnt > 2) {
1451dbed73cbSSangeeta Misra 		if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) {
1452dbed73cbSSangeeta Misra 			mutex_exit(&rule->ir_lock);
1453dbed73cbSSangeeta Misra 			ret = EINTR;
1454dbed73cbSSangeeta Misra 			goto end;
1455dbed73cbSSangeeta Misra 		}
1456dbed73cbSSangeeta Misra 	}
1457dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1458dbed73cbSSangeeta Misra 
1459dbed73cbSSangeeta Misra 	prev_server = NULL;
1460dbed73cbSSangeeta Misra 	for (server = rule->ir_servers; server != NULL;
1461dbed73cbSSangeeta Misra 	    prev_server = server, server = server->iser_next) {
1462dbed73cbSSangeeta Misra 		if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, addr))
1463dbed73cbSSangeeta Misra 			break;
1464dbed73cbSSangeeta Misra 	}
1465dbed73cbSSangeeta Misra 	if (server == NULL) {
1466dbed73cbSSangeeta Misra 		ret = ENOENT;
1467dbed73cbSSangeeta Misra 		goto end;
1468dbed73cbSSangeeta Misra 	}
1469dbed73cbSSangeeta Misra 
1470dbed73cbSSangeeta Misra 	/*
1471dbed73cbSSangeeta Misra 	 * Let the load balancing algorithm know about the removal.
1472dbed73cbSSangeeta Misra 	 * The algorithm may disallow the removal...
1473dbed73cbSSangeeta Misra 	 */
1474dbed73cbSSangeeta Misra 	if ((ret = rule->ir_alg->ilb_alg_server_del(server,
1475dbed73cbSSangeeta Misra 	    rule->ir_alg->ilb_alg_data)) != 0) {
1476dbed73cbSSangeeta Misra 		goto end;
1477dbed73cbSSangeeta Misra 	}
1478dbed73cbSSangeeta Misra 
1479dbed73cbSSangeeta Misra 	if (prev_server == NULL)
1480dbed73cbSSangeeta Misra 		rule->ir_servers = server->iser_next;
1481dbed73cbSSangeeta Misra 	else
1482dbed73cbSSangeeta Misra 		prev_server->iser_next = server->iser_next;
1483dbed73cbSSangeeta Misra 
1484dbed73cbSSangeeta Misra 	ILB_R_KSTAT_UPDATE(rule, num_servers, -1);
1485dbed73cbSSangeeta Misra 
1486dbed73cbSSangeeta Misra 	/*
1487dbed73cbSSangeeta Misra 	 * Mark the server as disabled so that if there is any sticky cache
1488dbed73cbSSangeeta Misra 	 * using this server around, it won't be used.
1489dbed73cbSSangeeta Misra 	 */
1490dbed73cbSSangeeta Misra 	server->iser_enabled = B_FALSE;
1491dbed73cbSSangeeta Misra 
1492dbed73cbSSangeeta Misra 	mutex_enter(&server->iser_lock);
1493dbed73cbSSangeeta Misra 
1494dbed73cbSSangeeta Misra 	/*
1495dbed73cbSSangeeta Misra 	 * De-allocate the NAT source array.  The indiviual ilb_nat_src_entry_t
1496dbed73cbSSangeeta Misra 	 * may not go away if there is still a conn using it.  The NAT source
1497dbed73cbSSangeeta Misra 	 * timer will do the garbage collection.
1498dbed73cbSSangeeta Misra 	 */
1499dbed73cbSSangeeta Misra 	ilb_destroy_nat_src(&server->iser_nat_src);
1500dbed73cbSSangeeta Misra 
1501dbed73cbSSangeeta Misra 	/* If there is a hard limit on when a server should die, set it. */
1502dbed73cbSSangeeta Misra 	if (rule->ir_conn_drain_timeout != 0) {
1503dbed73cbSSangeeta Misra 		(void) atomic_swap_64((uint64_t *)&server->iser_die_time,
1504d3d50737SRafael Vanoni 		    ddi_get_lbolt64() +
1505d3d50737SRafael Vanoni 		    SEC_TO_TICK(rule->ir_conn_drain_timeout));
1506dbed73cbSSangeeta Misra 	}
1507dbed73cbSSangeeta Misra 
1508dbed73cbSSangeeta Misra 	if (server->iser_refcnt > 1) {
1509dbed73cbSSangeeta Misra 		(void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_server_del_tq,
1510dbed73cbSSangeeta Misra 		    server, TQ_SLEEP);
1511dbed73cbSSangeeta Misra 		mutex_exit(&server->iser_lock);
1512dbed73cbSSangeeta Misra 	} else {
1513dbed73cbSSangeeta Misra 		kstat_delete_netstack(server->iser_ksp, server->iser_stackid);
1514dbed73cbSSangeeta Misra 		kmem_free(server, sizeof (ilb_server_t));
1515dbed73cbSSangeeta Misra 	}
1516dbed73cbSSangeeta Misra 
1517dbed73cbSSangeeta Misra end:
1518dbed73cbSSangeeta Misra 	mutex_enter(&rule->ir_lock);
1519dbed73cbSSangeeta Misra 	rule->ir_flags &= ~ILB_RULE_BUSY;
1520dbed73cbSSangeeta Misra 	if (rule_name != NULL)
1521dbed73cbSSangeeta Misra 		rule->ir_refcnt--;
1522dbed73cbSSangeeta Misra 	cv_signal(&rule->ir_cv);
1523dbed73cbSSangeeta Misra 	mutex_exit(&rule->ir_lock);
1524dbed73cbSSangeeta Misra 	return (ret);
1525dbed73cbSSangeeta Misra }
1526dbed73cbSSangeeta Misra 
1527dbed73cbSSangeeta Misra /*
1528dbed73cbSSangeeta Misra  * First check if the destination of the ICMP message matches a VIP of
1529dbed73cbSSangeeta Misra  * a rule.  If it does not, just return ILB_PASSED.
1530dbed73cbSSangeeta Misra  *
1531dbed73cbSSangeeta Misra  * If the destination matches a VIP:
1532dbed73cbSSangeeta Misra  *
1533dbed73cbSSangeeta Misra  * For ICMP_ECHO_REQUEST, generate a response on behalf of the back end
1534dbed73cbSSangeeta Misra  * server.
1535dbed73cbSSangeeta Misra  *
1536dbed73cbSSangeeta Misra  * For ICMP_DEST_UNREACHABLE fragmentation needed, check inside the payload
1537dbed73cbSSangeeta Misra  * and see which back end server we should send this message to.  And we
1538dbed73cbSSangeeta Misra  * need to do NAT on both the payload message and the outside IP packet.
1539dbed73cbSSangeeta Misra  *
1540dbed73cbSSangeeta Misra  * For other ICMP messages, drop them.
1541dbed73cbSSangeeta Misra  */
1542dbed73cbSSangeeta Misra /* ARGSUSED */
1543dbed73cbSSangeeta Misra static int
ilb_icmp_v4(ilb_stack_t * ilbs,ill_t * ill,mblk_t * mp,ipha_t * ipha,icmph_t * icmph,ipaddr_t * lb_dst)1544dbed73cbSSangeeta Misra ilb_icmp_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha,
1545dbed73cbSSangeeta Misra     icmph_t *icmph, ipaddr_t *lb_dst)
1546dbed73cbSSangeeta Misra {
1547dbed73cbSSangeeta Misra 	ipaddr_t vip;
1548dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1549dbed73cbSSangeeta Misra 	in6_addr_t addr6;
1550dbed73cbSSangeeta Misra 
1551dbed73cbSSangeeta Misra 	if (!ilb_rule_match_vip_v4(ilbs, ipha->ipha_dst, &rule))
1552dbed73cbSSangeeta Misra 		return (ILB_PASSED);
1553dbed73cbSSangeeta Misra 
1554dbed73cbSSangeeta Misra 
1555dbed73cbSSangeeta Misra 	if ((uint8_t *)icmph + sizeof (icmph_t) > mp->b_wptr) {
1556dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_dropped);
1557dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1558dbed73cbSSangeeta Misra 		return (ILB_DROPPED);
1559dbed73cbSSangeeta Misra 	}
1560dbed73cbSSangeeta Misra 
1561dbed73cbSSangeeta Misra 	switch (icmph->icmph_type) {
1562dbed73cbSSangeeta Misra 	case ICMP_ECHO_REQUEST:
1563dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_echo_processed);
1564dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1565dbed73cbSSangeeta Misra 
1566dbed73cbSSangeeta Misra 		icmph->icmph_type = ICMP_ECHO_REPLY;
1567dbed73cbSSangeeta Misra 		icmph->icmph_checksum = 0;
1568dbed73cbSSangeeta Misra 		icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(ipha), 0);
1569dbed73cbSSangeeta Misra 		ipha->ipha_ttl =
1570dbed73cbSSangeeta Misra 		    ilbs->ilbs_netstack->netstack_ip->ips_ip_def_ttl;
1571dbed73cbSSangeeta Misra 		*lb_dst = ipha->ipha_src;
1572dbed73cbSSangeeta Misra 		vip = ipha->ipha_dst;
1573dbed73cbSSangeeta Misra 		ipha->ipha_dst = ipha->ipha_src;
1574dbed73cbSSangeeta Misra 		ipha->ipha_src = vip;
1575dbed73cbSSangeeta Misra 		return (ILB_BALANCED);
1576dbed73cbSSangeeta Misra 	case ICMP_DEST_UNREACHABLE: {
1577dbed73cbSSangeeta Misra 		int ret;
1578dbed73cbSSangeeta Misra 
1579dbed73cbSSangeeta Misra 		if (icmph->icmph_code != ICMP_FRAGMENTATION_NEEDED) {
1580dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, icmp_dropped);
1581dbed73cbSSangeeta Misra 			ILB_RULE_REFRELE(rule);
1582dbed73cbSSangeeta Misra 			return (ILB_DROPPED);
1583dbed73cbSSangeeta Misra 		}
1584dbed73cbSSangeeta Misra 		if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IP, ipha, icmph,
1585dbed73cbSSangeeta Misra 		    &addr6)) {
1586dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, icmp_2big_processed);
1587dbed73cbSSangeeta Misra 			ret = ILB_BALANCED;
1588dbed73cbSSangeeta Misra 		} else {
1589dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, icmp_2big_dropped);
1590dbed73cbSSangeeta Misra 			ret = ILB_DROPPED;
1591dbed73cbSSangeeta Misra 		}
1592dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1593dbed73cbSSangeeta Misra 		IN6_V4MAPPED_TO_IPADDR(&addr6, *lb_dst);
1594dbed73cbSSangeeta Misra 		return (ret);
1595dbed73cbSSangeeta Misra 	}
1596dbed73cbSSangeeta Misra 	default:
1597dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_dropped);
1598dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1599dbed73cbSSangeeta Misra 		return (ILB_DROPPED);
1600dbed73cbSSangeeta Misra 	}
1601dbed73cbSSangeeta Misra }
1602dbed73cbSSangeeta Misra 
1603dbed73cbSSangeeta Misra /* ARGSUSED */
1604dbed73cbSSangeeta Misra static int
ilb_icmp_v6(ilb_stack_t * ilbs,ill_t * ill,mblk_t * mp,ip6_t * ip6h,icmp6_t * icmp6,in6_addr_t * lb_dst)1605dbed73cbSSangeeta Misra ilb_icmp_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h,
1606dbed73cbSSangeeta Misra     icmp6_t *icmp6, in6_addr_t *lb_dst)
1607dbed73cbSSangeeta Misra {
1608dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1609dbed73cbSSangeeta Misra 
1610dbed73cbSSangeeta Misra 	if (!ilb_rule_match_vip_v6(ilbs, &ip6h->ip6_dst, &rule))
1611dbed73cbSSangeeta Misra 		return (ILB_PASSED);
1612dbed73cbSSangeeta Misra 
1613dbed73cbSSangeeta Misra 	if ((uint8_t *)icmp6 + sizeof (icmp6_t) > mp->b_wptr) {
1614dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_dropped);
1615dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1616dbed73cbSSangeeta Misra 		return (ILB_DROPPED);
1617dbed73cbSSangeeta Misra 	}
1618dbed73cbSSangeeta Misra 
1619dbed73cbSSangeeta Misra 	switch (icmp6->icmp6_type) {
1620dbed73cbSSangeeta Misra 	case ICMP6_ECHO_REQUEST: {
1621dbed73cbSSangeeta Misra 		int hdr_len;
1622dbed73cbSSangeeta Misra 
1623dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_echo_processed);
1624dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1625dbed73cbSSangeeta Misra 
1626dbed73cbSSangeeta Misra 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
1627dbed73cbSSangeeta Misra 		icmp6->icmp6_cksum = ip6h->ip6_plen;
1628dbed73cbSSangeeta Misra 		hdr_len = (char *)icmp6 - (char *)ip6h;
1629dbed73cbSSangeeta Misra 		icmp6->icmp6_cksum = IP_CSUM(mp, hdr_len,
1630dbed73cbSSangeeta Misra 		    ilb_pseudo_sum_v6(ip6h, IPPROTO_ICMPV6));
1631dbed73cbSSangeeta Misra 		ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
1632dbed73cbSSangeeta Misra 		ip6h->ip6_hops =
1633dbed73cbSSangeeta Misra 		    ilbs->ilbs_netstack->netstack_ip->ips_ipv6_def_hops;
1634dbed73cbSSangeeta Misra 		*lb_dst = ip6h->ip6_src;
1635dbed73cbSSangeeta Misra 		ip6h->ip6_src = ip6h->ip6_dst;
1636dbed73cbSSangeeta Misra 		ip6h->ip6_dst = *lb_dst;
1637dbed73cbSSangeeta Misra 		return (ILB_BALANCED);
1638dbed73cbSSangeeta Misra 	}
1639dbed73cbSSangeeta Misra 	case ICMP6_PACKET_TOO_BIG: {
1640dbed73cbSSangeeta Misra 		int ret;
1641dbed73cbSSangeeta Misra 
1642dbed73cbSSangeeta Misra 		if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IPV6, ip6h, icmp6,
1643dbed73cbSSangeeta Misra 		    lb_dst)) {
1644dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, icmp_2big_processed);
1645dbed73cbSSangeeta Misra 			ret = ILB_BALANCED;
1646dbed73cbSSangeeta Misra 		} else {
1647dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, icmp_2big_dropped);
1648dbed73cbSSangeeta Misra 			ret = ILB_DROPPED;
1649dbed73cbSSangeeta Misra 		}
1650dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1651dbed73cbSSangeeta Misra 		return (ret);
1652dbed73cbSSangeeta Misra 	}
1653dbed73cbSSangeeta Misra 	default:
1654dbed73cbSSangeeta Misra 		ILB_R_KSTAT(rule, icmp_dropped);
1655dbed73cbSSangeeta Misra 		ILB_RULE_REFRELE(rule);
1656dbed73cbSSangeeta Misra 		return (ILB_DROPPED);
1657dbed73cbSSangeeta Misra 	}
1658dbed73cbSSangeeta Misra }
1659dbed73cbSSangeeta Misra 
1660dbed73cbSSangeeta Misra /*
1661dbed73cbSSangeeta Misra  * Common routine to check an incoming packet and decide what to do with it.
1662dbed73cbSSangeeta Misra  * called by ilb_check_v4|v6().
1663dbed73cbSSangeeta Misra  */
1664dbed73cbSSangeeta Misra static int
ilb_check(ilb_stack_t * ilbs,ill_t * ill,mblk_t * mp,in6_addr_t * src,in6_addr_t * dst,int l3,int l4,void * iph,uint8_t * tph,uint32_t pkt_len,in6_addr_t * lb_dst)1665dbed73cbSSangeeta Misra ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src,
1666dbed73cbSSangeeta Misra     in6_addr_t *dst, int l3, int l4, void *iph, uint8_t *tph, uint32_t pkt_len,
1667dbed73cbSSangeeta Misra     in6_addr_t *lb_dst)
1668dbed73cbSSangeeta Misra {
1669dbed73cbSSangeeta Misra 	in_port_t		sport, dport;
1670dbed73cbSSangeeta Misra 	tcpha_t			*tcph;
1671dbed73cbSSangeeta Misra 	udpha_t			*udph;
1672dbed73cbSSangeeta Misra 	ilb_rule_t		*rule;
1673dbed73cbSSangeeta Misra 	ilb_server_t		*server;
1674dbed73cbSSangeeta Misra 	boolean_t		balanced;
1675dbed73cbSSangeeta Misra 	struct ilb_sticky_s	*s = NULL;
1676dbed73cbSSangeeta Misra 	int			ret;
1677dbed73cbSSangeeta Misra 	uint32_t		ip_sum, tp_sum;
1678dbed73cbSSangeeta Misra 	ilb_nat_info_t		info;
1679dbed73cbSSangeeta Misra 	uint16_t		nat_src_idx;
1680dbed73cbSSangeeta Misra 	boolean_t		busy;
1681dbed73cbSSangeeta Misra 
1682*ab82c29bSToomas Soome 	ret = 0;
1683*ab82c29bSToomas Soome 
1684dbed73cbSSangeeta Misra 	/*
1685dbed73cbSSangeeta Misra 	 * We don't really need to switch here since both protocols's
1686dbed73cbSSangeeta Misra 	 * ports are at the same offset.  Just prepare for future protocol
1687dbed73cbSSangeeta Misra 	 * specific processing.
1688dbed73cbSSangeeta Misra 	 */
1689dbed73cbSSangeeta Misra 	switch (l4) {
1690dbed73cbSSangeeta Misra 	case IPPROTO_TCP:
1691dbed73cbSSangeeta Misra 		if (tph + TCP_MIN_HEADER_LENGTH > mp->b_wptr)
1692dbed73cbSSangeeta Misra 			return (ILB_DROPPED);
1693dbed73cbSSangeeta Misra 		tcph = (tcpha_t *)tph;
1694dbed73cbSSangeeta Misra 		sport = tcph->tha_lport;
1695dbed73cbSSangeeta Misra 		dport = tcph->tha_fport;
1696dbed73cbSSangeeta Misra 		break;
1697dbed73cbSSangeeta Misra 	case IPPROTO_UDP:
1698dbed73cbSSangeeta Misra 		if (tph + sizeof (udpha_t) > mp->b_wptr)
1699dbed73cbSSangeeta Misra 			return (ILB_DROPPED);
1700dbed73cbSSangeeta Misra 		udph = (udpha_t *)tph;
1701dbed73cbSSangeeta Misra 		sport = udph->uha_src_port;
1702dbed73cbSSangeeta Misra 		dport = udph->uha_dst_port;
1703dbed73cbSSangeeta Misra 		break;
1704dbed73cbSSangeeta Misra 	default:
1705dbed73cbSSangeeta Misra 		return (ILB_PASSED);
1706dbed73cbSSangeeta Misra 	}
1707dbed73cbSSangeeta Misra 
1708dbed73cbSSangeeta Misra 	/* Fast path, there is an existing conn. */
1709dbed73cbSSangeeta Misra 	if (ilb_check_conn(ilbs, l3, iph, l4, tph, src, dst, sport, dport,
1710dbed73cbSSangeeta Misra 	    pkt_len, lb_dst)) {
1711dbed73cbSSangeeta Misra 		return (ILB_BALANCED);
1712dbed73cbSSangeeta Misra 	}
1713dbed73cbSSangeeta Misra 
1714dbed73cbSSangeeta Misra 	/*
1715dbed73cbSSangeeta Misra 	 * If there is no existing connection for the incoming packet, check
1716dbed73cbSSangeeta Misra 	 * to see if the packet matches a rule.  If not, just let IP decide
1717dbed73cbSSangeeta Misra 	 * what to do with it.
1718dbed73cbSSangeeta Misra 	 *
1719dbed73cbSSangeeta Misra 	 * Note: a reply from back end server should not match a rule.  A
1720dbed73cbSSangeeta Misra 	 * reply should match one existing conn.
1721dbed73cbSSangeeta Misra 	 */
1722dbed73cbSSangeeta Misra 	rule = ilb_rule_hash(ilbs, l3, l4, dst, dport, ill->ill_zoneid,
1723dbed73cbSSangeeta Misra 	    pkt_len, &busy);
1724dbed73cbSSangeeta Misra 	if (rule == NULL) {
1725dbed73cbSSangeeta Misra 		/* If the rule is busy, just drop the packet. */
1726dbed73cbSSangeeta Misra 		if (busy)
1727dbed73cbSSangeeta Misra 			return (ILB_DROPPED);
1728dbed73cbSSangeeta Misra 		else
1729dbed73cbSSangeeta Misra 			return (ILB_PASSED);
1730dbed73cbSSangeeta Misra 	}
1731dbed73cbSSangeeta Misra 
1732dbed73cbSSangeeta Misra 	/*
1733dbed73cbSSangeeta Misra 	 * The packet matches a rule, use the rule load balance algorithm
1734dbed73cbSSangeeta Misra 	 * to find a server.
1735dbed73cbSSangeeta Misra 	 */
1736dbed73cbSSangeeta Misra 	balanced = rule->ir_alg->ilb_alg_lb(src, sport, dst, dport,
1737dbed73cbSSangeeta Misra 	    rule->ir_alg->ilb_alg_data, &server);
1738dbed73cbSSangeeta Misra 	/*
1739dbed73cbSSangeeta Misra 	 * This can only happen if there is no server in a rule or all
1740dbed73cbSSangeeta Misra 	 * the servers are currently disabled.
1741dbed73cbSSangeeta Misra 	 */
1742dbed73cbSSangeeta Misra 	if (!balanced)
1743dbed73cbSSangeeta Misra 		goto no_server;
1744dbed73cbSSangeeta Misra 
1745dbed73cbSSangeeta Misra 	/*
1746dbed73cbSSangeeta Misra 	 * If the rule is sticky enabled, we need to check the sticky table.
1747dbed73cbSSangeeta Misra 	 * If there is a sticky entry for the client, use the previous server
1748dbed73cbSSangeeta Misra 	 * instead of the one found above (note that both can be the same).
1749dbed73cbSSangeeta Misra 	 * If there is no entry for that client, add an entry to the sticky
1750dbed73cbSSangeeta Misra 	 * table.  Both the find and add are done in ilb_sticky_find_add()
1751dbed73cbSSangeeta Misra 	 * to avoid checking for duplicate when adding an entry.
1752dbed73cbSSangeeta Misra 	 */
1753dbed73cbSSangeeta Misra 	if (rule->ir_flags & ILB_RULE_STICKY) {
1754dbed73cbSSangeeta Misra 		in6_addr_t addr;
1755dbed73cbSSangeeta Misra 
1756dbed73cbSSangeeta Misra 		V6_MASK_COPY(*src, rule->ir_sticky_mask, addr);
1757dbed73cbSSangeeta Misra 		if ((server = ilb_sticky_find_add(ilbs, rule, &addr, server,
1758dbed73cbSSangeeta Misra 		    &s, &nat_src_idx)) == NULL) {
1759dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, nomem_pkt_dropped);
1760dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
1761dbed73cbSSangeeta Misra 			goto no_server;
1762dbed73cbSSangeeta Misra 		}
1763dbed73cbSSangeeta Misra 	}
1764dbed73cbSSangeeta Misra 
1765dbed73cbSSangeeta Misra 	/*
1766dbed73cbSSangeeta Misra 	 * We are holding a reference on the rule, so the server
1767dbed73cbSSangeeta Misra 	 * cannot go away.
1768dbed73cbSSangeeta Misra 	 */
1769dbed73cbSSangeeta Misra 	*lb_dst = server->iser_addr_v6;
1770dbed73cbSSangeeta Misra 	ILB_S_KSTAT(server, pkt_processed);
1771dbed73cbSSangeeta Misra 	ILB_S_KSTAT_UPDATE(server, bytes_processed, pkt_len);
1772dbed73cbSSangeeta Misra 
1773dbed73cbSSangeeta Misra 	switch (rule->ir_topo) {
1774dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_NAT: {
1775dbed73cbSSangeeta Misra 		ilb_nat_src_entry_t	*src_ent;
1776dbed73cbSSangeeta Misra 		uint16_t		*src_idx;
1777dbed73cbSSangeeta Misra 
1778dbed73cbSSangeeta Misra 		/*
1779dbed73cbSSangeeta Misra 		 * We create a cache even if it is not a SYN segment.
1780dbed73cbSSangeeta Misra 		 * The server should return a RST.  When we see the
1781dbed73cbSSangeeta Misra 		 * RST, we will destroy this cache.  But by having
1782dbed73cbSSangeeta Misra 		 * a cache, we know how to NAT the returned RST.
1783dbed73cbSSangeeta Misra 		 */
1784dbed73cbSSangeeta Misra 		info.vip = *dst;
1785dbed73cbSSangeeta Misra 		info.dport = dport;
1786dbed73cbSSangeeta Misra 		info.src = *src;
1787dbed73cbSSangeeta Misra 		info.sport = sport;
1788dbed73cbSSangeeta Misra 
1789dbed73cbSSangeeta Misra 		/* If stickiness is enabled, use the same source address */
1790dbed73cbSSangeeta Misra 		if (s != NULL)
1791dbed73cbSSangeeta Misra 			src_idx = &nat_src_idx;
1792dbed73cbSSangeeta Misra 		else
1793dbed73cbSSangeeta Misra 			src_idx = NULL;
1794dbed73cbSSangeeta Misra 
1795dbed73cbSSangeeta Misra 		if ((src_ent = ilb_alloc_nat_addr(server->iser_nat_src,
1796dbed73cbSSangeeta Misra 		    &info.nat_src, &info.nat_sport, src_idx)) == NULL) {
1797dbed73cbSSangeeta Misra 			if (s != NULL)
1798dbed73cbSSangeeta Misra 				ilb_sticky_refrele(s);
1799dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, pkt_dropped);
1800dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
1801dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, noport_pkt_dropped);
1802dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, noport_bytes_dropped, pkt_len);
1803dbed73cbSSangeeta Misra 			ret = ILB_DROPPED;
1804dbed73cbSSangeeta Misra 			break;
1805dbed73cbSSangeeta Misra 		}
1806dbed73cbSSangeeta Misra 		info.src_ent = src_ent;
1807dbed73cbSSangeeta Misra 		info.nat_dst = server->iser_addr_v6;
1808dbed73cbSSangeeta Misra 		if (rule->ir_port_range && server->iser_port_range) {
1809dbed73cbSSangeeta Misra 			info.nat_dport = htons(ntohs(dport) -
1810dbed73cbSSangeeta Misra 			    rule->ir_min_port + server->iser_min_port);
1811dbed73cbSSangeeta Misra 		} else {
1812dbed73cbSSangeeta Misra 			info.nat_dport = htons(server->iser_min_port);
1813dbed73cbSSangeeta Misra 		}
1814dbed73cbSSangeeta Misra 
1815dbed73cbSSangeeta Misra 		/*
1816dbed73cbSSangeeta Misra 		 * If ilb_conn_add() fails, it will release the reference on
1817dbed73cbSSangeeta Misra 		 * sticky info and de-allocate the NAT source port allocated
1818dbed73cbSSangeeta Misra 		 * above.
1819dbed73cbSSangeeta Misra 		 */
1820dbed73cbSSangeeta Misra 		if (ilb_conn_add(ilbs, rule, server, src, sport, dst,
1821dbed73cbSSangeeta Misra 		    dport, &info, &ip_sum, &tp_sum, s) != 0) {
1822dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, pkt_dropped);
1823dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
1824dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, nomem_pkt_dropped);
1825dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
1826dbed73cbSSangeeta Misra 			ret = ILB_DROPPED;
1827dbed73cbSSangeeta Misra 			break;
1828dbed73cbSSangeeta Misra 		}
1829dbed73cbSSangeeta Misra 		ilb_full_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE);
1830dbed73cbSSangeeta Misra 		ret = ILB_BALANCED;
1831dbed73cbSSangeeta Misra 		break;
1832dbed73cbSSangeeta Misra 	}
1833dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_HALF_NAT:
1834dbed73cbSSangeeta Misra 		info.vip = *dst;
1835dbed73cbSSangeeta Misra 		info.nat_dst = server->iser_addr_v6;
1836dbed73cbSSangeeta Misra 		info.dport = dport;
1837dbed73cbSSangeeta Misra 		if (rule->ir_port_range && server->iser_port_range) {
1838dbed73cbSSangeeta Misra 			info.nat_dport = htons(ntohs(dport) -
1839dbed73cbSSangeeta Misra 			    rule->ir_min_port + server->iser_min_port);
1840dbed73cbSSangeeta Misra 		} else {
1841dbed73cbSSangeeta Misra 			info.nat_dport = htons(server->iser_min_port);
1842dbed73cbSSangeeta Misra 		}
1843dbed73cbSSangeeta Misra 
1844dbed73cbSSangeeta Misra 		if (ilb_conn_add(ilbs, rule, server, src, sport, dst,
1845dbed73cbSSangeeta Misra 		    dport, &info, &ip_sum, &tp_sum, s) != 0) {
1846dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, pkt_dropped);
1847dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
1848dbed73cbSSangeeta Misra 			ILB_R_KSTAT(rule, nomem_pkt_dropped);
1849dbed73cbSSangeeta Misra 			ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len);
1850dbed73cbSSangeeta Misra 			ret = ILB_DROPPED;
1851dbed73cbSSangeeta Misra 			break;
1852dbed73cbSSangeeta Misra 		}
1853dbed73cbSSangeeta Misra 		ilb_half_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE);
1854dbed73cbSSangeeta Misra 
1855dbed73cbSSangeeta Misra 		ret = ILB_BALANCED;
1856dbed73cbSSangeeta Misra 		break;
1857dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_DSR:
1858dbed73cbSSangeeta Misra 		/*
1859dbed73cbSSangeeta Misra 		 * By decrementing the sticky refcnt, the period of
1860dbed73cbSSangeeta Misra 		 * stickiness (life time of ilb_sticky_t) will be
1861dbed73cbSSangeeta Misra 		 * from now to (now + default expiry time).
1862dbed73cbSSangeeta Misra 		 */
1863dbed73cbSSangeeta Misra 		if (s != NULL)
1864dbed73cbSSangeeta Misra 			ilb_sticky_refrele(s);
1865dbed73cbSSangeeta Misra 		ret = ILB_BALANCED;
1866dbed73cbSSangeeta Misra 		break;
1867dbed73cbSSangeeta Misra 	default:
1868dbed73cbSSangeeta Misra 		cmn_err(CE_PANIC, "data corruption unknown topology: %p",
1869dbed73cbSSangeeta Misra 		    (void *) rule);
1870dbed73cbSSangeeta Misra 		break;
1871dbed73cbSSangeeta Misra 	}
1872dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(rule);
1873dbed73cbSSangeeta Misra 	return (ret);
1874dbed73cbSSangeeta Misra 
1875dbed73cbSSangeeta Misra no_server:
1876dbed73cbSSangeeta Misra 	/* This can only happen if there is no server available. */
1877dbed73cbSSangeeta Misra 	ILB_R_KSTAT(rule, pkt_dropped);
1878dbed73cbSSangeeta Misra 	ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len);
1879dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(rule);
1880dbed73cbSSangeeta Misra 	return (ILB_DROPPED);
1881dbed73cbSSangeeta Misra }
1882dbed73cbSSangeeta Misra 
1883dbed73cbSSangeeta Misra int
ilb_check_v4(ilb_stack_t * ilbs,ill_t * ill,mblk_t * mp,ipha_t * ipha,int l4,uint8_t * tph,ipaddr_t * lb_dst)1884dbed73cbSSangeeta Misra ilb_check_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, int l4,
1885dbed73cbSSangeeta Misra     uint8_t *tph, ipaddr_t *lb_dst)
1886dbed73cbSSangeeta Misra {
1887dbed73cbSSangeeta Misra 	in6_addr_t v6_src, v6_dst, v6_lb_dst;
1888dbed73cbSSangeeta Misra 	int ret;
1889dbed73cbSSangeeta Misra 
1890dbed73cbSSangeeta Misra 	ASSERT(DB_REF(mp) == 1);
1891dbed73cbSSangeeta Misra 
1892dbed73cbSSangeeta Misra 	if (l4 == IPPROTO_ICMP) {
1893dbed73cbSSangeeta Misra 		return (ilb_icmp_v4(ilbs, ill, mp, ipha, (icmph_t *)tph,
1894dbed73cbSSangeeta Misra 		    lb_dst));
1895dbed73cbSSangeeta Misra 	}
1896dbed73cbSSangeeta Misra 
1897dbed73cbSSangeeta Misra 	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6_src);
1898dbed73cbSSangeeta Misra 	IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6_dst);
1899dbed73cbSSangeeta Misra 	ret = ilb_check(ilbs, ill, mp, &v6_src, &v6_dst, IPPROTO_IP, l4, ipha,
1900dbed73cbSSangeeta Misra 	    tph, ntohs(ipha->ipha_length), &v6_lb_dst);
1901dbed73cbSSangeeta Misra 	if (ret == ILB_BALANCED)
1902dbed73cbSSangeeta Misra 		IN6_V4MAPPED_TO_IPADDR(&v6_lb_dst, *lb_dst);
1903dbed73cbSSangeeta Misra 	return (ret);
1904dbed73cbSSangeeta Misra }
1905dbed73cbSSangeeta Misra 
1906dbed73cbSSangeeta Misra int
ilb_check_v6(ilb_stack_t * ilbs,ill_t * ill,mblk_t * mp,ip6_t * ip6h,int l4,uint8_t * tph,in6_addr_t * lb_dst)1907dbed73cbSSangeeta Misra ilb_check_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, int l4,
1908dbed73cbSSangeeta Misra     uint8_t *tph, in6_addr_t *lb_dst)
1909dbed73cbSSangeeta Misra {
1910dbed73cbSSangeeta Misra 	uint32_t pkt_len;
1911dbed73cbSSangeeta Misra 
1912dbed73cbSSangeeta Misra 	ASSERT(DB_REF(mp) == 1);
1913dbed73cbSSangeeta Misra 
1914dbed73cbSSangeeta Misra 	if (l4 == IPPROTO_ICMPV6) {
1915dbed73cbSSangeeta Misra 		return (ilb_icmp_v6(ilbs, ill, mp, ip6h, (icmp6_t *)tph,
1916dbed73cbSSangeeta Misra 		    lb_dst));
1917dbed73cbSSangeeta Misra 	}
1918dbed73cbSSangeeta Misra 
1919dbed73cbSSangeeta Misra 	pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1920dbed73cbSSangeeta Misra 	return (ilb_check(ilbs, ill, mp, &ip6h->ip6_src, &ip6h->ip6_dst,
1921dbed73cbSSangeeta Misra 	    IPPROTO_IPV6, l4, ip6h, tph, pkt_len, lb_dst));
1922dbed73cbSSangeeta Misra }
1923dbed73cbSSangeeta Misra 
1924dbed73cbSSangeeta Misra void
ilb_get_num_rules(ilb_stack_t * ilbs,zoneid_t zoneid,uint32_t * num_rules)1925dbed73cbSSangeeta Misra ilb_get_num_rules(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_rules)
1926dbed73cbSSangeeta Misra {
1927dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
1928dbed73cbSSangeeta Misra 
1929dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
1930dbed73cbSSangeeta Misra 	*num_rules = 0;
1931dbed73cbSSangeeta Misra 	for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
1932dbed73cbSSangeeta Misra 	    tmp_rule = tmp_rule->ir_next) {
1933dbed73cbSSangeeta Misra 		if (tmp_rule->ir_zoneid == zoneid)
1934dbed73cbSSangeeta Misra 			*num_rules += 1;
1935dbed73cbSSangeeta Misra 	}
1936dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
1937dbed73cbSSangeeta Misra }
1938dbed73cbSSangeeta Misra 
1939dbed73cbSSangeeta Misra int
ilb_get_num_servers(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,uint32_t * num_servers)1940dbed73cbSSangeeta Misra ilb_get_num_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
1941dbed73cbSSangeeta Misra     uint32_t *num_servers)
1942dbed73cbSSangeeta Misra {
1943dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1944dbed73cbSSangeeta Misra 	int err;
1945dbed73cbSSangeeta Misra 
1946dbed73cbSSangeeta Misra 	if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL)
1947dbed73cbSSangeeta Misra 		return (err);
1948dbed73cbSSangeeta Misra 	*num_servers = rule->ir_kstat.num_servers.value.ui64;
1949dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(rule);
1950dbed73cbSSangeeta Misra 	return (0);
1951dbed73cbSSangeeta Misra }
1952dbed73cbSSangeeta Misra 
1953dbed73cbSSangeeta Misra int
ilb_get_servers(ilb_stack_t * ilbs,zoneid_t zoneid,const char * name,ilb_server_info_t * servers,uint32_t * num_servers)1954dbed73cbSSangeeta Misra ilb_get_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name,
1955dbed73cbSSangeeta Misra     ilb_server_info_t *servers, uint32_t *num_servers)
1956dbed73cbSSangeeta Misra {
1957dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
1958dbed73cbSSangeeta Misra 	ilb_server_t *server;
1959dbed73cbSSangeeta Misra 	size_t cnt;
1960dbed73cbSSangeeta Misra 	int err;
1961dbed73cbSSangeeta Misra 
1962dbed73cbSSangeeta Misra 	if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL)
1963dbed73cbSSangeeta Misra 		return (err);
1964dbed73cbSSangeeta Misra 	for (server = rule->ir_servers, cnt = *num_servers;
1965dbed73cbSSangeeta Misra 	    server != NULL && cnt > 0;
1966dbed73cbSSangeeta Misra 	    server = server->iser_next, cnt--, servers++) {
1967dbed73cbSSangeeta Misra 		(void) memcpy(servers->name, server->iser_name,
1968dbed73cbSSangeeta Misra 		    ILB_SERVER_NAMESZ);
1969dbed73cbSSangeeta Misra 		servers->addr = server->iser_addr_v6;
1970dbed73cbSSangeeta Misra 		servers->min_port = htons(server->iser_min_port);
1971dbed73cbSSangeeta Misra 		servers->max_port = htons(server->iser_max_port);
1972dbed73cbSSangeeta Misra 		servers->flags = server->iser_enabled ? ILB_SERVER_ENABLED : 0;
1973dbed73cbSSangeeta Misra 		servers->err = 0;
1974dbed73cbSSangeeta Misra 	}
1975dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(rule);
1976dbed73cbSSangeeta Misra 	*num_servers -= cnt;
1977dbed73cbSSangeeta Misra 
1978dbed73cbSSangeeta Misra 	return (0);
1979dbed73cbSSangeeta Misra }
1980dbed73cbSSangeeta Misra 
1981dbed73cbSSangeeta Misra void
ilb_get_rulenames(ilb_stack_t * ilbs,zoneid_t zoneid,uint32_t * num_names,char * buf)1982dbed73cbSSangeeta Misra ilb_get_rulenames(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_names,
1983dbed73cbSSangeeta Misra     char *buf)
1984dbed73cbSSangeeta Misra {
1985dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
1986dbed73cbSSangeeta Misra 	int cnt;
1987dbed73cbSSangeeta Misra 
1988dbed73cbSSangeeta Misra 	if (*num_names == 0)
1989dbed73cbSSangeeta Misra 		return;
1990dbed73cbSSangeeta Misra 
1991dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
1992dbed73cbSSangeeta Misra 	for (cnt = 0, tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL;
1993dbed73cbSSangeeta Misra 	    tmp_rule = tmp_rule->ir_next) {
1994dbed73cbSSangeeta Misra 		if (tmp_rule->ir_zoneid != zoneid)
1995dbed73cbSSangeeta Misra 			continue;
1996dbed73cbSSangeeta Misra 
1997dbed73cbSSangeeta Misra 		(void) memcpy(buf, tmp_rule->ir_name, ILB_RULE_NAMESZ);
1998dbed73cbSSangeeta Misra 		buf += ILB_RULE_NAMESZ;
1999dbed73cbSSangeeta Misra 		if (++cnt == *num_names)
2000dbed73cbSSangeeta Misra 			break;
2001dbed73cbSSangeeta Misra 	}
2002dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
2003dbed73cbSSangeeta Misra 	*num_names = cnt;
2004dbed73cbSSangeeta Misra }
2005dbed73cbSSangeeta Misra 
2006dbed73cbSSangeeta Misra int
ilb_rule_list(ilb_stack_t * ilbs,zoneid_t zoneid,ilb_rule_cmd_t * cmd)2007dbed73cbSSangeeta Misra ilb_rule_list(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_rule_cmd_t *cmd)
2008dbed73cbSSangeeta Misra {
2009dbed73cbSSangeeta Misra 	ilb_rule_t *rule;
2010dbed73cbSSangeeta Misra 	int err;
2011dbed73cbSSangeeta Misra 
2012dbed73cbSSangeeta Misra 	if ((rule = ilb_find_rule(ilbs, zoneid, cmd->name, &err)) == NULL) {
2013dbed73cbSSangeeta Misra 		return (err);
2014dbed73cbSSangeeta Misra 	}
2015dbed73cbSSangeeta Misra 
2016dbed73cbSSangeeta Misra 	/*
2017dbed73cbSSangeeta Misra 	 * Except the enabled flags, none of the following will change
2018dbed73cbSSangeeta Misra 	 * in the life time of a rule.  So we don't hold the mutex when
2019dbed73cbSSangeeta Misra 	 * reading them.  The worst is to report a wrong enabled flags.
2020dbed73cbSSangeeta Misra 	 */
2021dbed73cbSSangeeta Misra 	cmd->ip_ver = rule->ir_ipver;
2022dbed73cbSSangeeta Misra 	cmd->proto = rule->ir_proto;
2023dbed73cbSSangeeta Misra 	cmd->min_port = htons(rule->ir_min_port);
2024dbed73cbSSangeeta Misra 	cmd->max_port = htons(rule->ir_max_port);
2025dbed73cbSSangeeta Misra 
2026dbed73cbSSangeeta Misra 	cmd->vip = rule->ir_target_v6;
2027dbed73cbSSangeeta Misra 	cmd->algo = rule->ir_alg_type;
2028dbed73cbSSangeeta Misra 	cmd->topo = rule->ir_topo;
2029dbed73cbSSangeeta Misra 
2030dbed73cbSSangeeta Misra 	cmd->nat_src_start = rule->ir_nat_src_start;
2031dbed73cbSSangeeta Misra 	cmd->nat_src_end = rule->ir_nat_src_end;
2032dbed73cbSSangeeta Misra 
2033dbed73cbSSangeeta Misra 	cmd->conn_drain_timeout = rule->ir_conn_drain_timeout;
2034dbed73cbSSangeeta Misra 	cmd->nat_expiry = rule->ir_nat_expiry;
2035dbed73cbSSangeeta Misra 	cmd->sticky_expiry = rule->ir_sticky_expiry;
2036dbed73cbSSangeeta Misra 
2037dbed73cbSSangeeta Misra 	cmd->flags = 0;
2038dbed73cbSSangeeta Misra 	if (rule->ir_flags & ILB_RULE_ENABLED)
2039dbed73cbSSangeeta Misra 		cmd->flags |= ILB_RULE_ENABLED;
2040dbed73cbSSangeeta Misra 	if (rule->ir_flags & ILB_RULE_STICKY) {
2041dbed73cbSSangeeta Misra 		cmd->flags |= ILB_RULE_STICKY;
2042dbed73cbSSangeeta Misra 		cmd->sticky_mask = rule->ir_sticky_mask;
2043dbed73cbSSangeeta Misra 	}
2044dbed73cbSSangeeta Misra 
2045dbed73cbSSangeeta Misra 	ILB_RULE_REFRELE(rule);
2046dbed73cbSSangeeta Misra 	return (0);
2047dbed73cbSSangeeta Misra }
2048dbed73cbSSangeeta Misra 
2049dbed73cbSSangeeta Misra static void *
ilb_stack_init(netstackid_t stackid,netstack_t * ns)2050dbed73cbSSangeeta Misra ilb_stack_init(netstackid_t stackid, netstack_t *ns)
2051dbed73cbSSangeeta Misra {
2052dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs;
2053dbed73cbSSangeeta Misra 	char tq_name[TASKQ_NAMELEN];
2054dbed73cbSSangeeta Misra 
2055dbed73cbSSangeeta Misra 	ilbs = kmem_alloc(sizeof (ilb_stack_t), KM_SLEEP);
2056dbed73cbSSangeeta Misra 	ilbs->ilbs_netstack = ns;
2057dbed73cbSSangeeta Misra 
2058dbed73cbSSangeeta Misra 	ilbs->ilbs_rule_head = NULL;
2059dbed73cbSSangeeta Misra 	ilbs->ilbs_g_hash = NULL;
2060dbed73cbSSangeeta Misra 	mutex_init(&ilbs->ilbs_g_lock, NULL, MUTEX_DEFAULT, NULL);
2061dbed73cbSSangeeta Misra 
2062dbed73cbSSangeeta Misra 	ilbs->ilbs_kstat = kmem_alloc(sizeof (ilb_g_kstat_t), KM_SLEEP);
2063dbed73cbSSangeeta Misra 	if ((ilbs->ilbs_ksp = ilb_kstat_g_init(stackid, ilbs)) == NULL) {
2064dbed73cbSSangeeta Misra 		kmem_free(ilbs, sizeof (ilb_stack_t));
2065dbed73cbSSangeeta Misra 		return (NULL);
2066dbed73cbSSangeeta Misra 	}
2067dbed73cbSSangeeta Misra 
2068dbed73cbSSangeeta Misra 	/*
2069dbed73cbSSangeeta Misra 	 * ilbs_conn/sticky_hash related info is initialized in
2070dbed73cbSSangeeta Misra 	 * ilb_conn/sticky_hash_init().
2071dbed73cbSSangeeta Misra 	 */
2072dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_taskq = NULL;
2073dbed73cbSSangeeta Misra 	ilbs->ilbs_rule_hash_size = ilb_rule_hash_size;
2074dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_hash_size = ilb_conn_hash_size;
2075dbed73cbSSangeeta Misra 	ilbs->ilbs_c2s_conn_hash = NULL;
2076dbed73cbSSangeeta Misra 	ilbs->ilbs_s2c_conn_hash = NULL;
2077dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_timer_list = NULL;
2078dbed73cbSSangeeta Misra 
2079dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_hash = NULL;
2080dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_hash_size = ilb_sticky_hash_size;
2081dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_timer_list = NULL;
2082dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_taskq = NULL;
2083dbed73cbSSangeeta Misra 
2084dbed73cbSSangeeta Misra 	/* The allocation is done later when there is a rule using NAT mode. */
2085dbed73cbSSangeeta Misra 	ilbs->ilbs_nat_src = NULL;
2086dbed73cbSSangeeta Misra 	ilbs->ilbs_nat_src_hash_size = ilb_nat_src_hash_size;
2087dbed73cbSSangeeta Misra 	mutex_init(&ilbs->ilbs_nat_src_lock, NULL, MUTEX_DEFAULT, NULL);
2088dbed73cbSSangeeta Misra 	ilbs->ilbs_nat_src_tid = 0;
2089dbed73cbSSangeeta Misra 
2090dbed73cbSSangeeta Misra 	/* For listing the conn hash table */
2091dbed73cbSSangeeta Misra 	mutex_init(&ilbs->ilbs_conn_list_lock, NULL, MUTEX_DEFAULT, NULL);
2092dbed73cbSSangeeta Misra 	cv_init(&ilbs->ilbs_conn_list_cv, NULL, CV_DEFAULT, NULL);
2093dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_busy = B_FALSE;
2094dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_cur = 0;
2095dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_connp = NULL;
2096dbed73cbSSangeeta Misra 
2097dbed73cbSSangeeta Misra 	/* For listing the sticky hash table */
2098dbed73cbSSangeeta Misra 	mutex_init(&ilbs->ilbs_sticky_list_lock, NULL, MUTEX_DEFAULT, NULL);
2099dbed73cbSSangeeta Misra 	cv_init(&ilbs->ilbs_sticky_list_cv, NULL, CV_DEFAULT, NULL);
2100dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_busy = B_FALSE;
2101dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_cur = 0;
2102dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_curp = NULL;
2103dbed73cbSSangeeta Misra 
21046e0672acSSangeeta Misra 	(void) snprintf(tq_name, sizeof (tq_name), "ilb_rule_taskq_%p",
21056e0672acSSangeeta Misra 	    (void *)ns);
2106dbed73cbSSangeeta Misra 	ilbs->ilbs_rule_taskq = taskq_create(tq_name, ILB_RULE_TASKQ_NUM_THR,
2107dbed73cbSSangeeta Misra 	    minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
2108dbed73cbSSangeeta Misra 
2109dbed73cbSSangeeta Misra 	return (ilbs);
2110dbed73cbSSangeeta Misra }
2111dbed73cbSSangeeta Misra 
2112dbed73cbSSangeeta Misra /* ARGSUSED */
2113dbed73cbSSangeeta Misra static void
ilb_stack_shutdown(netstackid_t stackid,void * arg)2114dbed73cbSSangeeta Misra ilb_stack_shutdown(netstackid_t stackid, void *arg)
2115dbed73cbSSangeeta Misra {
2116dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs = (ilb_stack_t *)arg;
2117dbed73cbSSangeeta Misra 	ilb_rule_t *tmp_rule;
2118dbed73cbSSangeeta Misra 
2119dbed73cbSSangeeta Misra 	ilb_sticky_hash_fini(ilbs);
2120dbed73cbSSangeeta Misra 	ilb_conn_hash_fini(ilbs);
2121dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_g_lock);
2122dbed73cbSSangeeta Misra 	while ((tmp_rule = ilbs->ilbs_rule_head) != NULL) {
2123dbed73cbSSangeeta Misra 		ilb_rule_hash_del(tmp_rule);
2124dbed73cbSSangeeta Misra 		ilb_rule_g_del(ilbs, tmp_rule);
2125dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_g_lock);
2126dbed73cbSSangeeta Misra 		ilb_rule_del_common(ilbs, tmp_rule);
2127dbed73cbSSangeeta Misra 		mutex_enter(&ilbs->ilbs_g_lock);
2128dbed73cbSSangeeta Misra 	}
2129dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_g_lock);
2130dbed73cbSSangeeta Misra 	if (ilbs->ilbs_nat_src != NULL)
2131dbed73cbSSangeeta Misra 		ilb_nat_src_fini(ilbs);
2132dbed73cbSSangeeta Misra }
2133dbed73cbSSangeeta Misra 
2134dbed73cbSSangeeta Misra static void
ilb_stack_fini(netstackid_t stackid,void * arg)2135dbed73cbSSangeeta Misra ilb_stack_fini(netstackid_t stackid, void * arg)
2136dbed73cbSSangeeta Misra {
2137dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs = (ilb_stack_t *)arg;
2138dbed73cbSSangeeta Misra 
2139dbed73cbSSangeeta Misra 	ilb_rule_hash_fini(ilbs);
2140dbed73cbSSangeeta Misra 	taskq_destroy(ilbs->ilbs_rule_taskq);
2141dbed73cbSSangeeta Misra 	ilb_kstat_g_fini(stackid, ilbs);
2142dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_kstat, sizeof (ilb_g_kstat_t));
2143dbed73cbSSangeeta Misra 	kmem_free(ilbs, sizeof (ilb_stack_t));
2144dbed73cbSSangeeta Misra }
2145dbed73cbSSangeeta Misra 
2146dbed73cbSSangeeta Misra void
ilb_ddi_g_init(void)2147dbed73cbSSangeeta Misra ilb_ddi_g_init(void)
2148dbed73cbSSangeeta Misra {
2149dbed73cbSSangeeta Misra 	netstack_register(NS_ILB, ilb_stack_init, ilb_stack_shutdown,
2150dbed73cbSSangeeta Misra 	    ilb_stack_fini);
2151dbed73cbSSangeeta Misra }
2152dbed73cbSSangeeta Misra 
2153dbed73cbSSangeeta Misra void
ilb_ddi_g_destroy(void)2154dbed73cbSSangeeta Misra ilb_ddi_g_destroy(void)
2155dbed73cbSSangeeta Misra {
2156dbed73cbSSangeeta Misra 	netstack_unregister(NS_ILB);
2157dbed73cbSSangeeta Misra 	ilb_conn_cache_fini();
2158dbed73cbSSangeeta Misra 	ilb_sticky_cache_fini();
2159dbed73cbSSangeeta Misra }
2160