1/*
2 * Copyright (c) 2008-2016 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 *    this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 *    this list of conditions and the following disclaimer in the documentation
12 *    and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * The views and conclusions contained in the software and documentation are
27 * those of the authors and should not be interpreted as representing official
28 * policies, either expressed or implied, of the FreeBSD Project.
29 */
30
31#include <sys/param.h>
32#include <sys/int_limits.h>
33#include <sys/byteorder.h>
34#include <sys/random.h>
35#include <sys/types.h>
36#include <sys/kmem.h>
37#include <netinet/in.h>
38#include "sfxge.h"
39#include "efx.h"
40
41/*
42 * The largest amount of the data which the hash may be calculated over
43 * is a 4-tuple of source/destination IPv6 addresses (2 x 16 bytes)
44 * and source/destination TCP port numbers (2 x 2 bytes), adding up to 40 bytes
45 */
46#define	SFXGE_TOEPLITZ_IN_MAX \
47	(2 * (sizeof (struct in6_addr) + sizeof (in_port_t)))
48#define	SFXGE_TOEPLITZ_CACHE_SIZE (SFXGE_TOEPLITZ_IN_MAX * (UINT8_MAX + 1))
49
50static uint32_t
51toeplitz_hash(const uint32_t *cache, const uint8_t *input,
52    unsigned pos, unsigned datalen)
53{
54	uint32_t hash = 0;
55	for (; datalen != 0; datalen--, pos++, input++) {
56		hash ^= cache[pos * (UINT8_MAX + 1) + *input];
57	}
58
59	return (hash);
60}
61
62uint32_t
63sfxge_toeplitz_hash(sfxge_t *sp, unsigned int addr_size,
64    uint8_t *src_addr, uint16_t src_port, uint8_t *dst_addr, uint16_t dst_port)
65{
66	uint32_t hash = 0;
67	unsigned pos = 0;
68
69	hash ^= toeplitz_hash(sp->s_toeplitz_cache, src_addr, pos, addr_size);
70	pos += addr_size;
71	hash ^= toeplitz_hash(sp->s_toeplitz_cache, dst_addr, pos, addr_size);
72	pos += addr_size;
73	if (src_port != 0 || dst_port != 0) {
74		hash ^= toeplitz_hash(sp->s_toeplitz_cache,
75		    (const uint8_t *)&src_port, pos, sizeof (src_port));
76		pos += sizeof (src_port);
77		hash ^= toeplitz_hash(sp->s_toeplitz_cache,
78		    (const uint8_t *)&dst_port, pos, sizeof (dst_port));
79	}
80	return (hash);
81}
82
83/*
84 * The algorithm to calculate RSS Toeplitz hash is essentially as follows:
85 * - Regard a Toeplitz key and an input as bit strings, with the
86 * most significant bit of the first byte being the first bit
87 * - Let's have a 32-bit window sliding over the Toeplitz key bit by bit
88 * - Let the initial value of the hash be zero
89 * - Then for every bit in the input that is set to 1, XOR the value of the
90 *   window at a given bit position into the resulting hash
91 *
92 * First we note that since XOR is commutative and associative, the
93 * resulting hash is just a XOR of subhashes for every input bit:
94 *        H = H_0 XOR H_1 XOR ... XOR H_n               (1)
95 * Then we note that every H_i is only dependent on the value of i and
96 * the value of i'th bit of input, but not on any preceding or following
97 * input bits.
98 * Then we note that (1) holds also for any bit sequences,
99 * e.g. for bytes of input:
100 *       H = H_0_7 XOR H_8_15 XOR ... XOR H_(n-7)_n     (2)
101 * and every
102 *       H_i_j = H_i XOR H_(i+1) ... XOR H_j.           (3)
103 *
104 * It naturally follows than H_i_(i+7) only depends on the value of the byte
105 * and the position of the byte in the input.
106 * Therefore we may pre-calculate the value of each byte sub-hash H_i_(i+7)
107 * for each possible byte value and each possible byte input position, and
108 * then just assemble the hash of the packet byte-by-byte instead of
109 * bit-by-bit.
110 *
111 * The amount of memory required for such a cache is not prohibitive:
112 * - we have at most 36 bytes of input, each holding 256 possible values
113 * - and the hash is 32-bit wide
114 * - hence, we need only 36 * 256 * 4 = 36kBytes of cache.
115 *
116 * The performance gain, at least on synthetic benchmarks, is significant:
117 * cache lookup is about 15 times faster than direct hash calculation
118 */
119const uint32_t *
120toeplitz_cache_init(const uint8_t *key)
121{
122	uint32_t *cache = kmem_alloc(SFXGE_TOEPLITZ_CACHE_SIZE *
123	    sizeof (uint32_t), KM_SLEEP);
124	unsigned i;
125
126	for (i = 0; i < SFXGE_TOEPLITZ_IN_MAX; i++, key++) {
127		uint32_t key_bits[NBBY] = { 0 };
128		unsigned j;
129		unsigned mask;
130		unsigned byte;
131
132#if defined(BE_IN32)
133		key_bits[0] = BE_IN32(key);
134#else
135		key_bits[0] = BE_32(*(uint32_t *)key);
136#endif
137		for (j = 1, mask = 1 << (NBBY - 1); j < NBBY; j++, mask >>= 1) {
138			key_bits[j] = key_bits[j - 1] << 1;
139			if ((key[sizeof (uint32_t)] & mask) != 0)
140				key_bits[j] |= 1;
141		}
142
143		for (byte = 0; byte <= UINT8_MAX; byte++) {
144			uint32_t res = 0;
145			for (j = 0, mask = 1 << (NBBY - 1);
146			    j < NBBY;
147			    j++, mask >>= 1) {
148				if (byte & mask)
149					res ^= key_bits[j];
150			}
151			cache[i * (UINT8_MAX + 1) + byte] = res;
152		}
153	}
154	return (cache);
155}
156
157
158int
159sfxge_toeplitz_hash_init(sfxge_t *sp)
160{
161	int rc;
162	uint8_t toeplitz_key[SFXGE_TOEPLITZ_KEY_LEN];
163
164	(void) random_get_pseudo_bytes(toeplitz_key, sizeof (toeplitz_key));
165
166	if ((rc = efx_rx_scale_mode_set(sp->s_enp, EFX_RX_HASHALG_TOEPLITZ,
167	    (1 << EFX_RX_HASH_IPV4) | (1 << EFX_RX_HASH_TCPIPV4) |
168	    (1 << EFX_RX_HASH_IPV6) | (1 << EFX_RX_HASH_TCPIPV6), B_TRUE)) != 0)
169		return (rc);
170
171	if ((rc = efx_rx_scale_key_set(sp->s_enp, toeplitz_key,
172	    sizeof (toeplitz_key))) != 0)
173		return (rc);
174
175	sp->s_toeplitz_cache = toeplitz_cache_init(toeplitz_key);
176
177	return (0);
178}
179
180void
181sfxge_toeplitz_hash_fini(sfxge_t *sp)
182{
183	if (sp->s_toeplitz_cache != NULL) {
184		kmem_free((void *)sp->s_toeplitz_cache,
185		    SFXGE_TOEPLITZ_CACHE_SIZE);
186		sp->s_toeplitz_cache = NULL;
187	}
188}
189