144961713Sgirish /*
244961713Sgirish  * CDDL HEADER START
344961713Sgirish  *
444961713Sgirish  * The contents of this file are subject to the terms of the
544961713Sgirish  * Common Development and Distribution License (the "License").
644961713Sgirish  * You may not use this file except in compliance with the License.
744961713Sgirish  *
844961713Sgirish  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
944961713Sgirish  * or http://www.opensolaris.org/os/licensing.
1044961713Sgirish  * See the License for the specific language governing permissions
1144961713Sgirish  * and limitations under the License.
1244961713Sgirish  *
1344961713Sgirish  * When distributing Covered Code, include this CDDL HEADER in each
1444961713Sgirish  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1544961713Sgirish  * If applicable, add the following below this CDDL HEADER, with the
1644961713Sgirish  * fields enclosed by brackets "[]" replaced with your own identifying
1744961713Sgirish  * information: Portions Copyright [yyyy] [name of copyright owner]
1844961713Sgirish  *
1944961713Sgirish  * CDDL HEADER END
2044961713Sgirish  */
2144961713Sgirish /*
2252ccf843Smisaki  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
2344961713Sgirish  * Use is subject to license terms.
2444961713Sgirish  */
2544961713Sgirish 
2644961713Sgirish #include <sys/types.h>
2744961713Sgirish #include <nxge_fflp_hash.h>
2844961713Sgirish 
2944961713Sgirish static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
30a3c5bd6dSspeer 
3144961713Sgirish /*
3244961713Sgirish  * The crc32c algorithms are taken from sctp_crc32 implementation
3344961713Sgirish  * common/inet/sctp_crc32.{c,h}
3444961713Sgirish  *
3544961713Sgirish  */
3644961713Sgirish 
3744961713Sgirish /*
38a3c5bd6dSspeer  * Fast CRC32C calculation algorithm.  The basic idea is to look at it
3944961713Sgirish  * four bytes (one word) at a time, using four tables.  The
4044961713Sgirish  * standard algorithm in RFC 3309 uses one table.
4144961713Sgirish  */
4244961713Sgirish 
4344961713Sgirish /*
4444961713Sgirish  * SCTP uses reflected/reverse polynomial CRC32 with generating
4544961713Sgirish  * polynomial 0x1EDC6F41L
4644961713Sgirish  */
4744961713Sgirish #define	SCTP_POLY 0x1EDC6F41L
4844961713Sgirish 
4944961713Sgirish /* CRC-CCITT Polynomial */
5044961713Sgirish #define	CRC_CCITT_POLY 0x1021
5144961713Sgirish 
5244961713Sgirish /* The four CRC32c tables. */
5344961713Sgirish static uint32_t crc32c_tab[4][256];
5444961713Sgirish 
5544961713Sgirish /* The four CRC-CCITT tables. */
5644961713Sgirish static uint16_t crc_ccitt_tab[4][256];
5744961713Sgirish 
5844961713Sgirish /* the four tables for H1 Computation */
5944961713Sgirish static uint32_t h1table[4][256];
6044961713Sgirish 
6144961713Sgirish #define	CRC_32C_POLY 0x1EDC6F41L
6244961713Sgirish 
6344961713Sgirish #define	COMPUTE_H1_BYTE(crc, data) \
6444961713Sgirish 	(crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
6544961713Sgirish 
6644961713Sgirish static uint32_t
reflect_32(uint32_t b)6744961713Sgirish reflect_32(uint32_t b)
6844961713Sgirish {
6944961713Sgirish 	int i;
7044961713Sgirish 	uint32_t rw = 0;
7144961713Sgirish 
7244961713Sgirish 	for (i = 0; i < 32; i++) {
7344961713Sgirish 		if (b & 1) {
7444961713Sgirish 			rw |= 1 << (31 - i);
7544961713Sgirish 		}
7644961713Sgirish 		b >>= 1;
7744961713Sgirish 	}
7844961713Sgirish 	return (rw);
7944961713Sgirish }
8044961713Sgirish 
8144961713Sgirish static uint32_t
flip32(uint32_t w)8244961713Sgirish flip32(uint32_t w)
8344961713Sgirish {
84a3c5bd6dSspeer 	return (((w >> 24) | ((w >> 8) & 0xff00) |
8552ccf843Smisaki 	    ((w << 8) & 0xff0000) | (w << 24)));
8644961713Sgirish }
8744961713Sgirish 
8844961713Sgirish /*
8944961713Sgirish  * reference crc-ccitt implementation
9044961713Sgirish  */
9144961713Sgirish 
9244961713Sgirish uint16_t
crc_ccitt(uint16_t crcin,uint8_t data)9344961713Sgirish crc_ccitt(uint16_t crcin, uint8_t data)
9444961713Sgirish {
9544961713Sgirish 	uint16_t mcrc, crc = 0, bits = 0;
9644961713Sgirish 
9744961713Sgirish 	mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
9844961713Sgirish 	for (bits = 0; bits < 8; bits++) {
9944961713Sgirish 		crc = ((crc ^ mcrc) & 0x8000) ?
10052ccf843Smisaki 		    (crc << 1) ^ CRC_CCITT_POLY :
10152ccf843Smisaki 		    crc << 1;
10244961713Sgirish 		mcrc <<= 1;
10344961713Sgirish 	}
10444961713Sgirish 	return ((crcin << 8) ^ crc);
10544961713Sgirish }
10644961713Sgirish 
10744961713Sgirish /*
10844961713Sgirish  * Initialize the crc32c tables.
10944961713Sgirish  */
11044961713Sgirish 
11144961713Sgirish void
nxge_crc32c_init(void)11244961713Sgirish nxge_crc32c_init(void)
11344961713Sgirish {
11444961713Sgirish 	uint32_t index, bit, byte, crc;
11544961713Sgirish 
11644961713Sgirish 	for (index = 0; index < 256; index++) {
11744961713Sgirish 		crc = reflect_32(index);
11844961713Sgirish 		for (byte = 0; byte < 4; byte++) {
11944961713Sgirish 			for (bit = 0; bit < 8; bit++) {
12044961713Sgirish 				crc = (crc & 0x80000000) ?
12152ccf843Smisaki 				    (crc << 1) ^ SCTP_POLY : crc << 1;
12244961713Sgirish 			}
12344961713Sgirish #ifdef _BIG_ENDIAN
12444961713Sgirish 			crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
12544961713Sgirish #else
12644961713Sgirish 			crc32c_tab[byte][index] = reflect_32(crc);
12744961713Sgirish #endif
12844961713Sgirish 		}
12944961713Sgirish 	}
13044961713Sgirish }
13144961713Sgirish 
13244961713Sgirish /*
13344961713Sgirish  * Initialize the crc-ccitt tables.
13444961713Sgirish  */
13544961713Sgirish 
13644961713Sgirish void
nxge_crc_ccitt_init(void)13744961713Sgirish nxge_crc_ccitt_init(void)
13844961713Sgirish {
13944961713Sgirish 	uint16_t crc;
14044961713Sgirish 	uint16_t index, bit, byte;
14144961713Sgirish 
14244961713Sgirish 	for (index = 0; index < 256; index++) {
14344961713Sgirish 		crc = index << 8;
14444961713Sgirish 		for (byte = 0; byte < 4; byte++) {
14544961713Sgirish 			for (bit = 0; bit < 8; bit++) {
14644961713Sgirish 				crc = (crc & 0x8000) ?
14752ccf843Smisaki 				    (crc << 1) ^ CRC_CCITT_POLY : crc << 1;
14844961713Sgirish 			}
14944961713Sgirish #ifdef _BIG_ENDIAN
15044961713Sgirish 			crc_ccitt_tab[3 - byte][index] = crc;
15144961713Sgirish #else
15244961713Sgirish 			crc_ccitt_tab[byte][index] = crc;
15344961713Sgirish #endif
15444961713Sgirish 		}
15544961713Sgirish 	}
15644961713Sgirish }
15744961713Sgirish 
15844961713Sgirish /*
15944961713Sgirish  * Lookup  the crc32c for a byte stream
16044961713Sgirish  */
16144961713Sgirish 
16244961713Sgirish static void
nxge_crc32c_byte(uint32_t * crcptr,const uint8_t * buf,int len)16344961713Sgirish nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
16444961713Sgirish {
16544961713Sgirish 	uint32_t crc;
16644961713Sgirish 	int i;
16744961713Sgirish 
16844961713Sgirish 	crc = *crcptr;
16944961713Sgirish 	for (i = 0; i < len; i++) {
17044961713Sgirish #ifdef _BIG_ENDIAN
17144961713Sgirish 		crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
17244961713Sgirish #else
17344961713Sgirish 		crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
17444961713Sgirish #endif
17544961713Sgirish 	}
17644961713Sgirish 	*crcptr = crc;
17744961713Sgirish }
17844961713Sgirish 
17944961713Sgirish /*
18044961713Sgirish  * Lookup  the crc-ccitt for a byte stream
18144961713Sgirish  */
18244961713Sgirish 
18344961713Sgirish static void
nxge_crc_ccitt_byte(uint16_t * crcptr,const uint8_t * buf,int len)18444961713Sgirish nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
18544961713Sgirish {
18644961713Sgirish 	uint16_t crc;
18744961713Sgirish 	int i;
18844961713Sgirish 
18944961713Sgirish 	crc = *crcptr;
19044961713Sgirish 	for (i = 0; i < len; i++) {
19144961713Sgirish 
19244961713Sgirish #ifdef _BIG_ENDIAN
19344961713Sgirish 		crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
19444961713Sgirish #else
19544961713Sgirish 		crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
19644961713Sgirish #endif
19744961713Sgirish 	}
19844961713Sgirish 	*crcptr = crc;
19944961713Sgirish }
20044961713Sgirish 
20144961713Sgirish /*
20244961713Sgirish  * Lookup  the crc32c for a 32 bit word stream
20344961713Sgirish  * Lookup is done fro the 4 bytes in parallel
20444961713Sgirish  * from the tables computed earlier
20544961713Sgirish  *
20644961713Sgirish  */
20744961713Sgirish 
20844961713Sgirish static void
nxge_crc32c_word(uint32_t * crcptr,const uint32_t * buf,int len)20944961713Sgirish nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
21044961713Sgirish {
21144961713Sgirish 	uint32_t w, crc;
21244961713Sgirish 	int i;
21344961713Sgirish 
21444961713Sgirish 	crc = *crcptr;
21544961713Sgirish 	for (i = 0; i < len; i++) {
21644961713Sgirish 		w = crc ^ buf[i];
217a3c5bd6dSspeer 		crc = crc32c_tab[0][w >> 24] ^
21852ccf843Smisaki 		    crc32c_tab[1][(w >> 16) & 0xff] ^
21952ccf843Smisaki 		    crc32c_tab[2][(w >> 8) & 0xff] ^
22052ccf843Smisaki 		    crc32c_tab[3][w & 0xff];
22144961713Sgirish 	}
22244961713Sgirish 	*crcptr = crc;
22344961713Sgirish }
22444961713Sgirish 
22544961713Sgirish /*
22644961713Sgirish  * Lookup  the crc-ccitt for a stream of bytes
22744961713Sgirish  *
22844961713Sgirish  * Since the parallel lookup version doesn't work yet,
22944961713Sgirish  * use the byte stream version (lookup crc for a byte
23044961713Sgirish  * at a time
23144961713Sgirish  *
23244961713Sgirish  */
233a3c5bd6dSspeer 
23444961713Sgirish uint16_t
nxge_crc_ccitt(uint16_t crc16,const uint8_t * buf,int len)23544961713Sgirish nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
23644961713Sgirish {
23744961713Sgirish 	nxge_crc_ccitt_byte(&crc16, buf, len);
23844961713Sgirish 	return (crc16);
23944961713Sgirish }
24044961713Sgirish 
24144961713Sgirish /*
24244961713Sgirish  * Lookup  the crc32c for a stream of bytes
24344961713Sgirish  *
24444961713Sgirish  * Tries to lookup the CRC on 4 byte words
24544961713Sgirish  * If the buffer is not 4 byte aligned, first compute
24644961713Sgirish  * with byte lookup until aligned. Then compute crc
24744961713Sgirish  * for each 4 bytes. If there are bytes left at the end of
24844961713Sgirish  * the buffer, then perform a byte lookup for the remaining bytes
24944961713Sgirish  *
25044961713Sgirish  *
25144961713Sgirish  */
25244961713Sgirish 
25344961713Sgirish uint32_t
nxge_crc32c(uint32_t crc32,const uint8_t * buf,int len)25444961713Sgirish nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
25544961713Sgirish {
25644961713Sgirish 	int rem;
25744961713Sgirish 
25844961713Sgirish 	rem = 4 - ((uintptr_t)buf) & 3;
25944961713Sgirish 	if (rem != 0) {
26044961713Sgirish 		if (len < rem) {
26144961713Sgirish 			rem = len;
26244961713Sgirish 		}
26344961713Sgirish 		nxge_crc32c_byte(&crc32, buf, rem);
26444961713Sgirish 		buf = buf + rem;
26544961713Sgirish 		len = len - rem;
26644961713Sgirish 	}
26744961713Sgirish 	if (len > 3) {
268a3c5bd6dSspeer 		nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
26944961713Sgirish 	}
27044961713Sgirish 	rem = len & 3;
27144961713Sgirish 	if (rem != 0) {
27244961713Sgirish 		nxge_crc32c_byte(&crc32, buf + len - rem, rem);
27344961713Sgirish 	}
27444961713Sgirish 	return (crc32);
27544961713Sgirish }
27644961713Sgirish 
27744961713Sgirish void
nxge_init_h1_table()27844961713Sgirish nxge_init_h1_table()
27944961713Sgirish {
28044961713Sgirish 	uint32_t crc, bit, byte, index;
28144961713Sgirish 
282a3c5bd6dSspeer 	for (index = 0; index < 256; index++) {
28344961713Sgirish 		crc = index << 24;
28444961713Sgirish 		for (byte = 0; byte < 4; byte++) {
28544961713Sgirish 			for (bit = 0; bit < 8; bit++) {
286a3c5bd6dSspeer 				crc = ((crc & 0x80000000)) ?
28752ccf843Smisaki 				    (crc << 1) ^ CRC_32C_POLY : crc << 1;
28844961713Sgirish 			}
28944961713Sgirish 			h1table[byte][index] = crc;
29044961713Sgirish 		}
29144961713Sgirish 	}
29244961713Sgirish }
29344961713Sgirish 
29444961713Sgirish /*
29544961713Sgirish  * Reference Neptune H1 computation function
29644961713Sgirish  *
29744961713Sgirish  * It is a slightly modified implementation of
29844961713Sgirish  * CRC-32C implementation
29944961713Sgirish  */
30044961713Sgirish 
30144961713Sgirish uint32_t
nxge_compute_h1_serial(uint32_t init_value,uint32_t * flow,uint32_t len)302a3c5bd6dSspeer nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
30344961713Sgirish {
30444961713Sgirish 	int bit, byte;
30544961713Sgirish 	uint32_t crc_h1 = init_value;
30644961713Sgirish 	uint8_t *buf;
307a3c5bd6dSspeer 
30844961713Sgirish 	buf = (uint8_t *)flow;
30944961713Sgirish 	for (byte = 0; byte < len; byte++) {
31044961713Sgirish 		for (bit = 0; bit < 8; bit++) {
31144961713Sgirish 			crc_h1 = (((crc_h1 >> 24) & 0x80) ^
31252ccf843Smisaki 			    ((buf[byte] << bit) & 0x80)) ?
31352ccf843Smisaki 			    (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
31444961713Sgirish 		}
31544961713Sgirish 	}
31644961713Sgirish 
31744961713Sgirish 	return (crc_h1);
31844961713Sgirish }
31944961713Sgirish 
32044961713Sgirish /*
32144961713Sgirish  * table based implementation
32244961713Sgirish  * uses 4 four tables in parallel
32344961713Sgirish  * 1 for each byte of a 32 bit word
32444961713Sgirish  *
32544961713Sgirish  * This is the default h1 computing function
32644961713Sgirish  *
32744961713Sgirish  */
32844961713Sgirish 
32944961713Sgirish uint32_t
nxge_compute_h1_table4(uint32_t crcin,uint32_t * flow,uint32_t length)330a3c5bd6dSspeer nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
33144961713Sgirish {
33244961713Sgirish 	uint32_t w, fw, i, crch1 = crcin;
33344961713Sgirish 	uint32_t *buf;
334a3c5bd6dSspeer 
33544961713Sgirish 	buf = (uint32_t *)flow;
33644961713Sgirish 
33744961713Sgirish 	for (i = 0; i < length / 4; i++) {
33844961713Sgirish #ifdef _BIG_ENDIAN
33944961713Sgirish 		fw = buf[i];
34044961713Sgirish #else
34144961713Sgirish 		fw = flip32(buf[i]);
34244961713Sgirish 		fw = buf[i];
34344961713Sgirish #endif
34444961713Sgirish 		w = crch1 ^ fw;
34544961713Sgirish 		crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
34652ccf843Smisaki 		    h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
34744961713Sgirish 	}
34844961713Sgirish 	return (crch1);
34944961713Sgirish }
35044961713Sgirish 
35144961713Sgirish /*
35244961713Sgirish  * table based implementation
35344961713Sgirish  * uses a single table and computes h1 for a byte
35444961713Sgirish  * at a time.
35544961713Sgirish  *
35644961713Sgirish  */
35744961713Sgirish 
35844961713Sgirish uint32_t
nxge_compute_h1_table1(uint32_t crcin,uint32_t * flow,uint32_t length)35944961713Sgirish nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
36044961713Sgirish {
36144961713Sgirish 
36244961713Sgirish 	uint32_t i, crch1, tmp = crcin;
36344961713Sgirish 	uint8_t *buf;
364a3c5bd6dSspeer 
36544961713Sgirish 	buf = (uint8_t *)flow;
36644961713Sgirish 
36744961713Sgirish 	tmp = crcin;
368*e3d11eeeSToomas Soome 	crch1 = 0;
36944961713Sgirish 	for (i = 0; i < length; i++) {
37044961713Sgirish 		crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
37144961713Sgirish 		tmp = crch1;
37244961713Sgirish 	}
37344961713Sgirish 
37444961713Sgirish 	return (crch1);
37544961713Sgirish }
376