1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/cmn_err.h>
29 #include <netinet/in.h>
30 #include <inet/ip.h>
31 #include <inet/ip6.h>
32 #include <sys/crc32.h>
33 
34 #include <inet/ilb.h>
35 #include "ilb_impl.h"
36 #include "ilb_alg.h"
37 
38 #define	HASH_IP_V4(hash, addr, size) 					\
39 {									\
40 	CRC32((hash), &(addr), sizeof (in_addr_t), -1U, crc32_table);	\
41 	(hash) %= (size);						\
42 }
43 #define	HASH_IP_V6(hash, addr, size)					\
44 	HASH_IP_V4((hash), (addr)->s6_addr32[3], (size))
45 
46 #define	HASH_IP_PORT_V4(hash, addr, port, size) 			\
47 {									\
48 	uint32_t val = (addr) ^ ((port) << 16) ^ (port);		\
49 	CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table);	\
50 	(hash) %= (size);						\
51 }
52 #define	HASH_IP_PORT_V6(hash, addr, port, size)				\
53 	HASH_IP_PORT_V4((hash), (addr)->s6_addr32[3], (port), (size))
54 
55 #define	HASH_IP_VIP_V4(hash, saddr, daddr, size)			\
56 {									\
57 	uint32_t val = (saddr) ^ (daddr);				\
58 	CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table);	\
59 	(hash) %= (size);						\
60 }
61 #define	HASH_IP_VIP_V6(hash, saddr, daddr, size) 			\
62 	HASH_IP_VIP_V4((hash), (saddr)->s6_addr32[3], (daddr)->s6_addr32[3], \
63 	(size))
64 
65 #define	INIT_HASH_TBL_SIZE	10
66 
67 typedef struct {
68 	ilb_server_t	*server;
69 	boolean_t	enabled;
70 } hash_server_t;
71 
72 /*
73  * There are two hash tables.  The hash_tbl holds all servers, both enabled
74  * and disabled.  The hash_enabled_tbl only holds enabled servers.  Having
75  * two tables allows the hash on a client request remains the same even when
76  * some servers are disabled.  If a server is disabled and a client's request
77  * hashes to it, we will do another hash.  This time the has is on the enabled
78  * server table.
79  */
80 typedef struct hash_s {
81 	kmutex_t	hash_lock;
82 	size_t		hash_servers;		/* Total # of servers */
83 	size_t		hash_tbl_size;		/* All server table size */
84 	size_t		hash_enabled_servers;	/* # of enabled servers */
85 	size_t		hash_enabled_tbl_size;	/* Enabled server table size */
86 	hash_server_t	*hash_tbl;
87 	hash_server_t	*hash_enabled_tbl;
88 	ilb_algo_impl_t	hash_type;
89 } hash_t;
90 
91 static void hash_fini(ilb_alg_data_t **);
92 
93 /* ARGSUSED */
94 static boolean_t
hash_lb(in6_addr_t * saddr,in_port_t sport,in6_addr_t * daddr,in_port_t dport,void * alg_data,ilb_server_t ** ret_server)95 hash_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr,
96     in_port_t dport, void *alg_data, ilb_server_t **ret_server)
97 {
98 	hash_t *hash_alg = (hash_t *)alg_data;
99 	uint32_t i;
100 
101 	ASSERT(ret_server != NULL);
102 	*ret_server = NULL;
103 
104 	mutex_enter(&hash_alg->hash_lock);
105 
106 	if (hash_alg->hash_servers == 0) {
107 		mutex_exit(&hash_alg->hash_lock);
108 		return (B_FALSE);
109 	}
110 
111 	switch (hash_alg->hash_type) {
112 	case ILB_ALG_IMPL_HASH_IP:
113 		HASH_IP_V6(i, saddr, hash_alg->hash_servers);
114 		break;
115 	case ILB_ALG_IMPL_HASH_IP_SPORT:
116 		HASH_IP_PORT_V6(i, saddr, sport, hash_alg->hash_servers);
117 		break;
118 	case ILB_ALG_IMPL_HASH_IP_VIP:
119 		HASH_IP_VIP_V6(i, saddr, daddr, hash_alg->hash_servers);
120 		break;
121 	default:
122 		mutex_exit(&hash_alg->hash_lock);
123 		return (B_FALSE);
124 	}
125 	if (hash_alg->hash_tbl[i].enabled) {
126 		*ret_server = hash_alg->hash_tbl[i].server;
127 		mutex_exit(&hash_alg->hash_lock);
128 		return (B_TRUE);
129 	}
130 
131 	if (hash_alg->hash_enabled_servers == 0) {
132 		mutex_exit(&hash_alg->hash_lock);
133 		return (B_FALSE);
134 	}
135 
136 	switch (hash_alg->hash_type) {
137 	case ILB_ALG_IMPL_HASH_IP:
138 		HASH_IP_V6(i, saddr, hash_alg->hash_enabled_servers);
139 		break;
140 	case ILB_ALG_IMPL_HASH_IP_SPORT:
141 		HASH_IP_PORT_V6(i, saddr, sport,
142 		    hash_alg->hash_enabled_servers);
143 		break;
144 	case ILB_ALG_IMPL_HASH_IP_VIP:
145 		HASH_IP_VIP_V6(i, saddr, daddr,
146 		    hash_alg->hash_enabled_servers);
147 		break;
148 	default:
149 		ASSERT(0);
150 		break;
151 	}
152 	*ret_server = hash_alg->hash_enabled_tbl[i].server;
153 	mutex_exit(&hash_alg->hash_lock);
154 	return (B_TRUE);
155 }
156 
157 static boolean_t
del_server(hash_server_t * tbl,size_t hash_size,ilb_server_t * host)158 del_server(hash_server_t *tbl, size_t hash_size, ilb_server_t *host)
159 {
160 	size_t i, j;
161 
162 	for (i = 0; i < hash_size; i++) {
163 		if (tbl[i].server == host) {
164 			if (i == hash_size - 1)
165 				break;
166 			for (j = i; j < hash_size - 1; j++)
167 				tbl[j] = tbl[j + 1];
168 			break;
169 		}
170 	}
171 	/* Not found... */
172 	if (i == hash_size)
173 		return (B_FALSE);
174 	tbl[hash_size - 1].server = NULL;
175 	tbl[hash_size - 1].enabled = B_FALSE;
176 	return (B_TRUE);
177 }
178 
179 static int
hash_server_del(ilb_server_t * host,void * alg_data)180 hash_server_del(ilb_server_t *host, void *alg_data)
181 {
182 	hash_t *hash_alg = (hash_t *)alg_data;
183 	boolean_t ret;
184 
185 	mutex_enter(&hash_alg->hash_lock);
186 
187 	ret = del_server(hash_alg->hash_tbl, hash_alg->hash_servers, host);
188 	if (!ret) {
189 		mutex_exit(&hash_alg->hash_lock);
190 		return (EINVAL);
191 	}
192 	hash_alg->hash_servers--;
193 
194 	/* The server may not be enabled. */
195 	ret = del_server(hash_alg->hash_enabled_tbl,
196 	    hash_alg->hash_enabled_servers, host);
197 	if (ret)
198 		hash_alg->hash_enabled_servers--;
199 
200 	mutex_exit(&hash_alg->hash_lock);
201 	ILB_SERVER_REFRELE(host);
202 	return (0);
203 }
204 
205 static int
grow_tbl(hash_server_t ** hash_tbl,size_t * tbl_size)206 grow_tbl(hash_server_t **hash_tbl, size_t *tbl_size)
207 {
208 	size_t mem_size;
209 	hash_server_t *new_tbl;
210 
211 	if ((new_tbl = kmem_zalloc(sizeof (hash_server_t) *
212 	    (*tbl_size + INIT_HASH_TBL_SIZE), KM_NOSLEEP)) == NULL) {
213 		return (ENOMEM);
214 	}
215 	mem_size = *tbl_size * sizeof (hash_server_t);
216 	bcopy(*hash_tbl, new_tbl, mem_size);
217 	kmem_free(*hash_tbl, mem_size);
218 	*hash_tbl = new_tbl;
219 	*tbl_size += INIT_HASH_TBL_SIZE;
220 	return (0);
221 }
222 
223 static int
hash_server_add(ilb_server_t * host,void * alg_data)224 hash_server_add(ilb_server_t *host, void *alg_data)
225 {
226 	hash_t *hash_alg = (hash_t *)alg_data;
227 	size_t new_size;
228 
229 	mutex_enter(&hash_alg->hash_lock);
230 
231 	/* First add the server to the hash_tbl. */
232 	new_size = hash_alg->hash_servers + 1;
233 	if (new_size > hash_alg->hash_tbl_size) {
234 		if (grow_tbl(&hash_alg->hash_tbl, &hash_alg->hash_tbl_size) !=
235 		    0) {
236 			mutex_exit(&hash_alg->hash_lock);
237 			return (ENOMEM);
238 		}
239 	}
240 
241 	hash_alg->hash_tbl[hash_alg->hash_servers].server = host;
242 	hash_alg->hash_tbl[hash_alg->hash_servers].enabled = host->iser_enabled;
243 	hash_alg->hash_servers++;
244 
245 	if (!host->iser_enabled) {
246 		mutex_exit(&hash_alg->hash_lock);
247 		ILB_SERVER_REFHOLD(host);
248 		return (0);
249 	}
250 
251 	/* If the server is enabled, add it to the hasn_enabled_tbl. */
252 	new_size = hash_alg->hash_enabled_servers + 1;
253 	if (new_size > hash_alg->hash_enabled_tbl_size) {
254 		if (grow_tbl(&hash_alg->hash_enabled_tbl,
255 		    &hash_alg->hash_enabled_tbl_size) != 0) {
256 			mutex_exit(&hash_alg->hash_lock);
257 			return (ENOMEM);
258 		}
259 	}
260 	hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].server =
261 	    host;
262 	hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].enabled =
263 	    B_TRUE;
264 	hash_alg->hash_enabled_servers++;
265 
266 	mutex_exit(&hash_alg->hash_lock);
267 	ILB_SERVER_REFHOLD(host);
268 	return (0);
269 }
270 
271 static int
hash_server_enable(ilb_server_t * host,void * alg_data)272 hash_server_enable(ilb_server_t *host, void *alg_data)
273 {
274 	hash_t *alg = (hash_t *)alg_data;
275 	size_t new_size, i;
276 
277 	mutex_enter(&alg->hash_lock);
278 
279 	for (i = 0; i < alg->hash_servers; i++) {
280 		if (alg->hash_tbl[i].server == host) {
281 			if (alg->hash_tbl[i].enabled) {
282 				mutex_exit(&alg->hash_lock);
283 				return (0);
284 			} else {
285 				break;
286 			}
287 		}
288 	}
289 	if (i == alg->hash_servers) {
290 		mutex_exit(&alg->hash_lock);
291 		return (EINVAL);
292 	}
293 
294 #if DEBUG
295 	/* The server should not be in the enabled tabled. */
296 	{
297 		size_t j;
298 
299 		for (j = 0; j < alg->hash_enabled_servers; j++) {
300 			if (alg->hash_enabled_tbl[j].server == host) {
301 				cmn_err(CE_PANIC, "Corrupted ILB enabled hash "
302 				    "table");
303 			}
304 		}
305 	}
306 #endif
307 
308 	new_size = alg->hash_enabled_servers + 1;
309 	if (new_size > alg->hash_enabled_tbl_size) {
310 		if (grow_tbl(&alg->hash_enabled_tbl,
311 		    &alg->hash_enabled_tbl_size) != 0) {
312 			mutex_exit(&alg->hash_lock);
313 			return (ENOMEM);
314 		}
315 	}
316 	alg->hash_tbl[i].enabled = B_TRUE;
317 	alg->hash_enabled_tbl[alg->hash_enabled_servers].server = host;
318 	alg->hash_enabled_tbl[alg->hash_enabled_servers].enabled = B_TRUE;
319 	alg->hash_enabled_servers++;
320 
321 	mutex_exit(&alg->hash_lock);
322 	return (0);
323 }
324 
325 static int
hash_server_disable(ilb_server_t * host,void * alg_data)326 hash_server_disable(ilb_server_t *host, void *alg_data)
327 {
328 	hash_t *alg = (hash_t *)alg_data;
329 	size_t i;
330 
331 	mutex_enter(&alg->hash_lock);
332 
333 	for (i = 0; i < alg->hash_servers; i++) {
334 		if (alg->hash_tbl[i].server == host) {
335 			if (!alg->hash_tbl[i].enabled) {
336 				mutex_exit(&alg->hash_lock);
337 				return (0);
338 			} else {
339 				break;
340 			}
341 		}
342 	}
343 	if (i == alg->hash_servers) {
344 		mutex_exit(&alg->hash_lock);
345 		return (EINVAL);
346 	}
347 
348 	alg->hash_tbl[i].enabled = B_FALSE;
349 #if DEBUG
350 	ASSERT(del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
351 	    host));
352 #else
353 	(void) del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
354 	    host);
355 #endif
356 	alg->hash_enabled_servers--;
357 
358 	mutex_exit(&alg->hash_lock);
359 	return (0);
360 }
361 
362 /* ARGSUSED */
363 ilb_alg_data_t *
ilb_alg_hash_init(ilb_rule_t * rule,const void * arg)364 ilb_alg_hash_init(ilb_rule_t *rule, const void *arg)
365 {
366 	ilb_alg_data_t	*alg;
367 	hash_t		*hash_alg;
368 	int		flags = *(int *)arg;
369 
370 	if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL)
371 		return (NULL);
372 	if ((hash_alg = kmem_alloc(sizeof (hash_t), KM_NOSLEEP)) == NULL) {
373 		kmem_free(alg, sizeof (ilb_alg_data_t));
374 		return (NULL);
375 	}
376 	alg->ilb_alg_lb = hash_lb;
377 	alg->ilb_alg_server_del = hash_server_del;
378 	alg->ilb_alg_server_add = hash_server_add;
379 	alg->ilb_alg_server_enable = hash_server_enable;
380 	alg->ilb_alg_server_disable = hash_server_disable;
381 	alg->ilb_alg_fini = hash_fini;
382 	alg->ilb_alg_data = hash_alg;
383 
384 	mutex_init(&hash_alg->hash_lock, NULL, MUTEX_DEFAULT, NULL);
385 	hash_alg->hash_type = flags;
386 
387 	/* Table of all servers */
388 	hash_alg->hash_servers = 0;
389 	hash_alg->hash_tbl_size = INIT_HASH_TBL_SIZE;
390 	hash_alg->hash_tbl = kmem_zalloc(sizeof (hash_server_t) *
391 	    INIT_HASH_TBL_SIZE, KM_NOSLEEP);
392 	if (hash_alg->hash_tbl == NULL) {
393 		kmem_free(hash_alg, sizeof (hash_t));
394 		kmem_free(alg, sizeof (ilb_alg_data_t));
395 		return (NULL);
396 	}
397 
398 	/* Table of only enabled servers */
399 	hash_alg->hash_enabled_servers = 0;
400 	hash_alg->hash_enabled_tbl_size = INIT_HASH_TBL_SIZE;
401 	hash_alg->hash_enabled_tbl = kmem_zalloc(sizeof (hash_server_t) *
402 	    INIT_HASH_TBL_SIZE, KM_NOSLEEP);
403 	if (hash_alg->hash_tbl == NULL) {
404 		kmem_free(hash_alg->hash_tbl, INIT_HASH_TBL_SIZE *
405 		    sizeof (ilb_server_t *));
406 		kmem_free(hash_alg, sizeof (hash_t));
407 		kmem_free(alg, sizeof (ilb_alg_data_t));
408 		return (NULL);
409 	}
410 
411 	return (alg);
412 }
413 
414 static void
hash_fini(ilb_alg_data_t ** alg)415 hash_fini(ilb_alg_data_t **alg)
416 {
417 	hash_t		*hash_alg;
418 	int		i;
419 
420 	hash_alg = (*alg)->ilb_alg_data;
421 	for (i = 0; i < hash_alg->hash_servers; i++)
422 		ILB_SERVER_REFRELE(hash_alg->hash_tbl[i].server);
423 
424 	kmem_free(hash_alg->hash_tbl, sizeof (hash_server_t) *
425 	    hash_alg->hash_tbl_size);
426 	kmem_free(hash_alg->hash_enabled_tbl, sizeof (hash_server_t) *
427 	    hash_alg->hash_enabled_tbl_size);
428 	kmem_free(hash_alg, sizeof (hash_t));
429 	kmem_free(*alg, sizeof (ilb_alg_data_t));
430 	*alg = NULL;
431 }
432