1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #define	ARCFOUR_LOOP_OPTIMIZED
27 
28 #ifndef _KERNEL
29 #include <stdint.h>
30 #endif	/* _KERNEL */
31 
32 #include "arcfour.h"
33 
34 #if defined(__amd64)
35 /* ARCFour_key.flag values */
36 #define	ARCFOUR_ON_INTEL	1
37 #define	ARCFOUR_ON_AMD64	0
38 
39 #ifdef _KERNEL
40 #include <sys/x86_archext.h>
41 #include <sys/cpuvar.h>
42 
43 #else
44 #include <sys/auxv.h>
45 #endif	/* _KERNEL */
46 #endif	/* __amd64 */
47 
48 #ifndef __amd64
49 /*
50  * Initialize the key stream 'key' using the key value.
51  *
52  * Input:
53  * keyval	User-provided key
54  * keyvallen	Length, in bytes, of keyval
55  * Output:
56  * key		Initialized ARCFOUR key schedule, based on keyval
57  */
58 void
arcfour_key_init(ARCFour_key * key,uchar_t * keyval,int keyvallen)59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
60 {
61 	uchar_t ext_keyval[256];
62 	uchar_t tmp;
63 	int i, j;
64 
65 	/* Normalize key length to 256 */
66 	for (i = j = 0; i < 256; i++, j++) {
67 		if (j == keyvallen)
68 			j = 0;
69 		ext_keyval[i] = keyval[j];
70 	}
71 
72 	for (i = 0; i < 256; i++)
73 		key->arr[i] = (uchar_t)i;
74 
75 	j = 0;
76 	for (i = 0; i < 256; i++) {
77 		j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
78 		tmp = key->arr[i];
79 		key->arr[i] = key->arr[j];
80 		key->arr[j] = tmp;
81 	}
82 	key->i = 0;
83 	key->j = 0;
84 }
85 #endif	/* !__amd64 */
86 
87 
88 /*
89  * Encipher 'in' using 'key'.
90  *
91  * Input:
92  * key		ARCFOUR key, initialized by arcfour_key_init()
93  * in		Input text
94  * out		Buffer to contain output text
95  * len		Length, in bytes, of the in and out buffers
96  *
97  * Output:
98  * out		Buffer containing output text
99  *
100  * Note: in and out can point to the same location
101  */
102 void
arcfour_crypt(ARCFour_key * key,uchar_t * in,uchar_t * out,size_t len)103 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
104 {
105 #ifdef	__amd64
106 	if (key->flag == ARCFOUR_ON_AMD64) {
107 		arcfour_crypt_asm(key, in, out, len);
108 	} else { /* Intel EM64T */
109 #endif	/* amd64 */
110 
111 	size_t		ii;
112 	uchar_t		i, j, ti, tj;
113 #ifdef ARCFOUR_LOOP_OPTIMIZED
114 	uchar_t		arr_ij;
115 #endif
116 #ifdef __amd64
117 	uint32_t	*arr;
118 #else
119 	uchar_t		*arr;
120 #endif
121 
122 #ifdef	sun4u
123 	/*
124 	 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
125 	 * the cases where the input and output buffers are aligned on
126 	 * a multiple of 8-byte boundary.
127 	 */
128 	int		index;
129 	uchar_t		tmp;
130 
131 	index = (((uint64_t)(uintptr_t)in) & 0x7);
132 
133 	/* Get the 'in' on an 8-byte alignment */
134 	if (index > 0) {
135 		i = key->i;
136 		j = key->j;
137 		for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
138 		    (index-- > 0) && len > 0;
139 		    len--, in++, out++) {
140 			++i;
141 			j = j + key->arr[i];
142 			tmp = key->arr[i];
143 			key->arr[i] = key->arr[j];
144 			key->arr[j] = tmp;
145 			tmp = key->arr[i] + key->arr[j];
146 			*out = *in ^ key->arr[tmp];
147 		}
148 		key->i = i;
149 		key->j = j;
150 	}
151 
152 	if (len == 0)
153 		return;
154 
155 	/* See if we're fortunate and 'out' got aligned as well */
156 
157 	if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
158 #endif	/* sun4u */
159 
160 	i = key->i;
161 	j = key->j;
162 	arr = key->arr;
163 
164 #ifndef ARCFOUR_LOOP_OPTIMIZED
165 	/*
166 	 * This loop is hasn't been reordered, but is kept for reference
167 	 * purposes as it's more readable
168 	 */
169 	for (ii = 0; ii < len; ++ii) {
170 		++i;
171 		ti = arr[i];
172 		j = j + ti;
173 		tj = arr[j];
174 		arr[j] = ti;
175 		arr[i] = tj;
176 		out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
177 	}
178 
179 #else
180 	/*
181 	 * This for loop is optimized by carefully spreading out
182 	 * memory access and storage to avoid conflicts,
183 	 * allowing the processor to process operations in parallel
184 	 */
185 
186 	/* for loop setup */
187 	++i;
188 	ti = arr[i];
189 	j = j + ti;
190 	tj = arr[j];
191 	arr[j] = ti;
192 	arr[i] = tj;
193 	arr_ij = arr[(ti + tj) & 0xff];
194 	--len;
195 
196 	for (ii = 0; ii < len; ) {
197 		++i;
198 		ti = arr[i];
199 		j = j + ti;
200 		tj = arr[j];
201 		arr[j] = ti;
202 		arr[i] = tj;
203 
204 		/* save result from previous loop: */
205 		out[ii] = in[ii] ^ arr_ij;
206 
207 		++ii;
208 		arr_ij = arr[(ti + tj) & 0xff];
209 	}
210 	/* save result from last loop: */
211 	out[ii] = in[ii] ^ arr_ij;
212 #endif
213 
214 	key->i = i;
215 	key->j = j;
216 
217 #ifdef	sun4u
218 	} else {
219 		arcfour_crypt_aligned(key, len, in, out);
220 	}
221 #endif	/* sun4u */
222 #ifdef	__amd64
223 	}
224 #endif	/* amd64 */
225 }
226 
227 
228 #ifdef	__amd64
229 /*
230  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
231  * Cache the result, as the CPU can't change.
232  *
233  * Note: the userland version uses getisax() and checks for an AMD-64-only
234  * feature.  The kernel version uses cpuid_getvendor().
235  */
236 int
arcfour_crypt_on_intel(void)237 arcfour_crypt_on_intel(void)
238 {
239 	static int	cached_result = -1;
240 
241 	if (cached_result == -1) { /* first time */
242 #ifdef _KERNEL
243 		cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
244 #else
245 		uint_t	ui;
246 
247 		(void) getisax(&ui, 1);
248 		cached_result = ((ui & AV_386_AMD_MMX) == 0);
249 #endif	/* _KERNEL */
250 	}
251 
252 	return (cached_result);
253 }
254 #endif	/* __amd64 */
255