17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5160abee0Sda  * Common Development and Distribution License (the "License").
6160abee0Sda  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2292a8e44dSDan OpenSolaris Anderson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
2692a8e44dSDan OpenSolaris Anderson #define	ARCFOUR_LOOP_OPTIMIZED
277c478bd9Sstevel@tonic-gate 
28*8de5c4f4SDan OpenSolaris Anderson #ifndef _KERNEL
29*8de5c4f4SDan OpenSolaris Anderson #include <stdint.h>
30*8de5c4f4SDan OpenSolaris Anderson #endif	/* _KERNEL */
31*8de5c4f4SDan OpenSolaris Anderson 
327c478bd9Sstevel@tonic-gate #include "arcfour.h"
337c478bd9Sstevel@tonic-gate 
34160abee0Sda #if defined(__amd64)
3592a8e44dSDan OpenSolaris Anderson /* ARCFour_key.flag values */
3692a8e44dSDan OpenSolaris Anderson #define	ARCFOUR_ON_INTEL	1
3792a8e44dSDan OpenSolaris Anderson #define	ARCFOUR_ON_AMD64	0
3892a8e44dSDan OpenSolaris Anderson 
3955553f71Sda #ifdef _KERNEL
4055553f71Sda #include <sys/x86_archext.h>
4155553f71Sda #include <sys/cpuvar.h>
4255553f71Sda 
4355553f71Sda #else
4455553f71Sda #include <sys/auxv.h>
4555553f71Sda #endif	/* _KERNEL */
4655553f71Sda #endif	/* __amd64 */
47160abee0Sda 
4892a8e44dSDan OpenSolaris Anderson #ifndef __amd64
4992a8e44dSDan OpenSolaris Anderson /*
5092a8e44dSDan OpenSolaris Anderson  * Initialize the key stream 'key' using the key value.
5192a8e44dSDan OpenSolaris Anderson  *
5292a8e44dSDan OpenSolaris Anderson  * Input:
5392a8e44dSDan OpenSolaris Anderson  * keyval	User-provided key
5492a8e44dSDan OpenSolaris Anderson  * keyvallen	Length, in bytes, of keyval
5592a8e44dSDan OpenSolaris Anderson  * Output:
5692a8e44dSDan OpenSolaris Anderson  * key		Initialized ARCFOUR key schedule, based on keyval
5792a8e44dSDan OpenSolaris Anderson  */
587c478bd9Sstevel@tonic-gate void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)597c478bd9Sstevel@tonic-gate arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
607c478bd9Sstevel@tonic-gate {
617c478bd9Sstevel@tonic-gate 	uchar_t ext_keyval[256];
627c478bd9Sstevel@tonic-gate 	uchar_t tmp;
637c478bd9Sstevel@tonic-gate 	int i, j;
647c478bd9Sstevel@tonic-gate 
6555553f71Sda 	/* Normalize key length to 256 */
667c478bd9Sstevel@tonic-gate 	for (i = j = 0; i < 256; i++, j++) {
677c478bd9Sstevel@tonic-gate 		if (j == keyvallen)
687c478bd9Sstevel@tonic-gate 			j = 0;
697c478bd9Sstevel@tonic-gate 		ext_keyval[i] = keyval[j];
707c478bd9Sstevel@tonic-gate 	}
7155553f71Sda 
727c478bd9Sstevel@tonic-gate 	for (i = 0; i < 256; i++)
737c478bd9Sstevel@tonic-gate 		key->arr[i] = (uchar_t)i;
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate 	j = 0;
767c478bd9Sstevel@tonic-gate 	for (i = 0; i < 256; i++) {
7792a8e44dSDan OpenSolaris Anderson 		j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
787c478bd9Sstevel@tonic-gate 		tmp = key->arr[i];
797c478bd9Sstevel@tonic-gate 		key->arr[i] = key->arr[j];
807c478bd9Sstevel@tonic-gate 		key->arr[j] = tmp;
817c478bd9Sstevel@tonic-gate 	}
827c478bd9Sstevel@tonic-gate 	key->i = 0;
837c478bd9Sstevel@tonic-gate 	key->j = 0;
847c478bd9Sstevel@tonic-gate }
8592a8e44dSDan OpenSolaris Anderson #endif	/* !__amd64 */
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate /*
89160abee0Sda  * Encipher 'in' using 'key'.
9092a8e44dSDan OpenSolaris Anderson  *
9192a8e44dSDan OpenSolaris Anderson  * Input:
9292a8e44dSDan OpenSolaris Anderson  * key		ARCFOUR key, initialized by arcfour_key_init()
9392a8e44dSDan OpenSolaris Anderson  * in		Input text
9492a8e44dSDan OpenSolaris Anderson  * out		Buffer to contain output text
9592a8e44dSDan OpenSolaris Anderson  * len		Length, in bytes, of the in and out buffers
9692a8e44dSDan OpenSolaris Anderson  *
9792a8e44dSDan OpenSolaris Anderson  * Output:
9892a8e44dSDan OpenSolaris Anderson  * out		Buffer containing output text
9992a8e44dSDan OpenSolaris Anderson  *
10092a8e44dSDan OpenSolaris Anderson  * Note: in and out can point to the same location
1017c478bd9Sstevel@tonic-gate  */
1027c478bd9Sstevel@tonic-gate void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)1037c478bd9Sstevel@tonic-gate arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
1047c478bd9Sstevel@tonic-gate {
10592a8e44dSDan OpenSolaris Anderson #ifdef	__amd64
10692a8e44dSDan OpenSolaris Anderson 	if (key->flag == ARCFOUR_ON_AMD64) {
10792a8e44dSDan OpenSolaris Anderson 		arcfour_crypt_asm(key, in, out, len);
10892a8e44dSDan OpenSolaris Anderson 	} else { /* Intel EM64T */
10992a8e44dSDan OpenSolaris Anderson #endif	/* amd64 */
11092a8e44dSDan OpenSolaris Anderson 
11192a8e44dSDan OpenSolaris Anderson 	size_t		ii;
11292a8e44dSDan OpenSolaris Anderson 	uchar_t		i, j, ti, tj;
11392a8e44dSDan OpenSolaris Anderson #ifdef ARCFOUR_LOOP_OPTIMIZED
11492a8e44dSDan OpenSolaris Anderson 	uchar_t		arr_ij;
11592a8e44dSDan OpenSolaris Anderson #endif
11692a8e44dSDan OpenSolaris Anderson #ifdef __amd64
11792a8e44dSDan OpenSolaris Anderson 	uint32_t	*arr;
11892a8e44dSDan OpenSolaris Anderson #else
11992a8e44dSDan OpenSolaris Anderson 	uchar_t		*arr;
12092a8e44dSDan OpenSolaris Anderson #endif
1217c478bd9Sstevel@tonic-gate 
12292a8e44dSDan OpenSolaris Anderson #ifdef	sun4u
1237c478bd9Sstevel@tonic-gate 	/*
1247c478bd9Sstevel@tonic-gate 	 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
12592a8e44dSDan OpenSolaris Anderson 	 * the cases where the input and output buffers are aligned on
1267c478bd9Sstevel@tonic-gate 	 * a multiple of 8-byte boundary.
1277c478bd9Sstevel@tonic-gate 	 */
12892a8e44dSDan OpenSolaris Anderson 	int		index;
12992a8e44dSDan OpenSolaris Anderson 	uchar_t		tmp;
1307c478bd9Sstevel@tonic-gate 
1314cc1ac68Skrishna 	index = (((uint64_t)(uintptr_t)in) & 0x7);
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate 	/* Get the 'in' on an 8-byte alignment */
1347c478bd9Sstevel@tonic-gate 	if (index > 0) {
1357c478bd9Sstevel@tonic-gate 		i = key->i;
1367c478bd9Sstevel@tonic-gate 		j = key->j;
1374cc1ac68Skrishna 		for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
1384cc1ac68Skrishna 		    (index-- > 0) && len > 0;
1397c478bd9Sstevel@tonic-gate 		    len--, in++, out++) {
14092a8e44dSDan OpenSolaris Anderson 			++i;
1417c478bd9Sstevel@tonic-gate 			j = j + key->arr[i];
1427c478bd9Sstevel@tonic-gate 			tmp = key->arr[i];
1437c478bd9Sstevel@tonic-gate 			key->arr[i] = key->arr[j];
1447c478bd9Sstevel@tonic-gate 			key->arr[j] = tmp;
1457c478bd9Sstevel@tonic-gate 			tmp = key->arr[i] + key->arr[j];
1467c478bd9Sstevel@tonic-gate 			*out = *in ^ key->arr[tmp];
1477c478bd9Sstevel@tonic-gate 		}
1487c478bd9Sstevel@tonic-gate 		key->i = i;
1497c478bd9Sstevel@tonic-gate 		key->j = j;
1507c478bd9Sstevel@tonic-gate 	}
15192a8e44dSDan OpenSolaris Anderson 
1527c478bd9Sstevel@tonic-gate 	if (len == 0)
1537c478bd9Sstevel@tonic-gate 		return;
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 	/* See if we're fortunate and 'out' got aligned as well */
1567c478bd9Sstevel@tonic-gate 
1574cc1ac68Skrishna 	if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
1587c478bd9Sstevel@tonic-gate #endif	/* sun4u */
15992a8e44dSDan OpenSolaris Anderson 
16092a8e44dSDan OpenSolaris Anderson 	i = key->i;
16192a8e44dSDan OpenSolaris Anderson 	j = key->j;
16292a8e44dSDan OpenSolaris Anderson 	arr = key->arr;
16392a8e44dSDan OpenSolaris Anderson 
16492a8e44dSDan OpenSolaris Anderson #ifndef ARCFOUR_LOOP_OPTIMIZED
16592a8e44dSDan OpenSolaris Anderson 	/*
16692a8e44dSDan OpenSolaris Anderson 	 * This loop is hasn't been reordered, but is kept for reference
16792a8e44dSDan OpenSolaris Anderson 	 * purposes as it's more readable
16892a8e44dSDan OpenSolaris Anderson 	 */
16992a8e44dSDan OpenSolaris Anderson 	for (ii = 0; ii < len; ++ii) {
17092a8e44dSDan OpenSolaris Anderson 		++i;
17192a8e44dSDan OpenSolaris Anderson 		ti = arr[i];
17292a8e44dSDan OpenSolaris Anderson 		j = j + ti;
17392a8e44dSDan OpenSolaris Anderson 		tj = arr[j];
17492a8e44dSDan OpenSolaris Anderson 		arr[j] = ti;
17592a8e44dSDan OpenSolaris Anderson 		arr[i] = tj;
17692a8e44dSDan OpenSolaris Anderson 		out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
17792a8e44dSDan OpenSolaris Anderson 	}
17892a8e44dSDan OpenSolaris Anderson 
17992a8e44dSDan OpenSolaris Anderson #else
18092a8e44dSDan OpenSolaris Anderson 	/*
18192a8e44dSDan OpenSolaris Anderson 	 * This for loop is optimized by carefully spreading out
18292a8e44dSDan OpenSolaris Anderson 	 * memory access and storage to avoid conflicts,
18392a8e44dSDan OpenSolaris Anderson 	 * allowing the processor to process operations in parallel
18492a8e44dSDan OpenSolaris Anderson 	 */
18592a8e44dSDan OpenSolaris Anderson 
18692a8e44dSDan OpenSolaris Anderson 	/* for loop setup */
18792a8e44dSDan OpenSolaris Anderson 	++i;
18892a8e44dSDan OpenSolaris Anderson 	ti = arr[i];
18992a8e44dSDan OpenSolaris Anderson 	j = j + ti;
19092a8e44dSDan OpenSolaris Anderson 	tj = arr[j];
19192a8e44dSDan OpenSolaris Anderson 	arr[j] = ti;
19292a8e44dSDan OpenSolaris Anderson 	arr[i] = tj;
19392a8e44dSDan OpenSolaris Anderson 	arr_ij = arr[(ti + tj) & 0xff];
19492a8e44dSDan OpenSolaris Anderson 	--len;
19592a8e44dSDan OpenSolaris Anderson 
19692a8e44dSDan OpenSolaris Anderson 	for (ii = 0; ii < len; ) {
19792a8e44dSDan OpenSolaris Anderson 		++i;
19892a8e44dSDan OpenSolaris Anderson 		ti = arr[i];
19992a8e44dSDan OpenSolaris Anderson 		j = j + ti;
20092a8e44dSDan OpenSolaris Anderson 		tj = arr[j];
20192a8e44dSDan OpenSolaris Anderson 		arr[j] = ti;
20292a8e44dSDan OpenSolaris Anderson 		arr[i] = tj;
20392a8e44dSDan OpenSolaris Anderson 
20492a8e44dSDan OpenSolaris Anderson 		/* save result from previous loop: */
20592a8e44dSDan OpenSolaris Anderson 		out[ii] = in[ii] ^ arr_ij;
20692a8e44dSDan OpenSolaris Anderson 
20792a8e44dSDan OpenSolaris Anderson 		++ii;
20892a8e44dSDan OpenSolaris Anderson 		arr_ij = arr[(ti + tj) & 0xff];
20992a8e44dSDan OpenSolaris Anderson 	}
21092a8e44dSDan OpenSolaris Anderson 	/* save result from last loop: */
21192a8e44dSDan OpenSolaris Anderson 	out[ii] = in[ii] ^ arr_ij;
21292a8e44dSDan OpenSolaris Anderson #endif
21392a8e44dSDan OpenSolaris Anderson 
21492a8e44dSDan OpenSolaris Anderson 	key->i = i;
21592a8e44dSDan OpenSolaris Anderson 	key->j = j;
21692a8e44dSDan OpenSolaris Anderson 
2177c478bd9Sstevel@tonic-gate #ifdef	sun4u
2187c478bd9Sstevel@tonic-gate 	} else {
2197c478bd9Sstevel@tonic-gate 		arcfour_crypt_aligned(key, len, in, out);
2207c478bd9Sstevel@tonic-gate 	}
2217c478bd9Sstevel@tonic-gate #endif	/* sun4u */
22292a8e44dSDan OpenSolaris Anderson #ifdef	__amd64
22392a8e44dSDan OpenSolaris Anderson 	}
22492a8e44dSDan OpenSolaris Anderson #endif	/* amd64 */
2257c478bd9Sstevel@tonic-gate }
22655553f71Sda 
22755553f71Sda 
22892a8e44dSDan OpenSolaris Anderson #ifdef	__amd64
22955553f71Sda /*
23055553f71Sda  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
231*8de5c4f4SDan OpenSolaris Anderson  * Cache the result, as the CPU can't change.
232*8de5c4f4SDan OpenSolaris Anderson  *
233*8de5c4f4SDan OpenSolaris Anderson  * Note: the userland version uses getisax() and checks for an AMD-64-only
234*8de5c4f4SDan OpenSolaris Anderson  * feature.  The kernel version uses cpuid_getvendor().
23555553f71Sda  */
23655553f71Sda int
arcfour_crypt_on_intel(void)23755553f71Sda arcfour_crypt_on_intel(void)
23855553f71Sda {
239*8de5c4f4SDan OpenSolaris Anderson 	static int	cached_result = -1;
240*8de5c4f4SDan OpenSolaris Anderson 
241*8de5c4f4SDan OpenSolaris Anderson 	if (cached_result == -1) { /* first time */
24255553f71Sda #ifdef _KERNEL
243*8de5c4f4SDan OpenSolaris Anderson 		cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
24455553f71Sda #else
245*8de5c4f4SDan OpenSolaris Anderson 		uint_t	ui;
246*8de5c4f4SDan OpenSolaris Anderson 
247*8de5c4f4SDan OpenSolaris Anderson 		(void) getisax(&ui, 1);
248*8de5c4f4SDan OpenSolaris Anderson 		cached_result = ((ui & AV_386_AMD_MMX) == 0);
24955553f71Sda #endif	/* _KERNEL */
250*8de5c4f4SDan OpenSolaris Anderson 	}
251*8de5c4f4SDan OpenSolaris Anderson 
252*8de5c4f4SDan OpenSolaris Anderson 	return (cached_result);
25355553f71Sda }
25492a8e44dSDan OpenSolaris Anderson #endif	/* __amd64 */
255