1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#define	ARCFOUR_LOOP_OPTIMIZED
27
28#ifndef _KERNEL
29#include <stdint.h>
30#endif	/* _KERNEL */
31
32#include "arcfour.h"
33
34#if defined(__amd64)
35/* ARCFour_key.flag values */
36#define	ARCFOUR_ON_INTEL	1
37#define	ARCFOUR_ON_AMD64	0
38
39#ifdef _KERNEL
40#include <sys/x86_archext.h>
41#include <sys/cpuvar.h>
42
43#else
44#include <sys/auxv.h>
45#endif	/* _KERNEL */
46#endif	/* __amd64 */
47
48#ifndef __amd64
49/*
50 * Initialize the key stream 'key' using the key value.
51 *
52 * Input:
53 * keyval	User-provided key
54 * keyvallen	Length, in bytes, of keyval
55 * Output:
56 * key		Initialized ARCFOUR key schedule, based on keyval
57 */
58void
59arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
60{
61	uchar_t ext_keyval[256];
62	uchar_t tmp;
63	int i, j;
64
65	/* Normalize key length to 256 */
66	for (i = j = 0; i < 256; i++, j++) {
67		if (j == keyvallen)
68			j = 0;
69		ext_keyval[i] = keyval[j];
70	}
71
72	for (i = 0; i < 256; i++)
73		key->arr[i] = (uchar_t)i;
74
75	j = 0;
76	for (i = 0; i < 256; i++) {
77		j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
78		tmp = key->arr[i];
79		key->arr[i] = key->arr[j];
80		key->arr[j] = tmp;
81	}
82	key->i = 0;
83	key->j = 0;
84}
85#endif	/* !__amd64 */
86
87
88/*
89 * Encipher 'in' using 'key'.
90 *
91 * Input:
92 * key		ARCFOUR key, initialized by arcfour_key_init()
93 * in		Input text
94 * out		Buffer to contain output text
95 * len		Length, in bytes, of the in and out buffers
96 *
97 * Output:
98 * out		Buffer containing output text
99 *
100 * Note: in and out can point to the same location
101 */
102void
103arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
104{
105#ifdef	__amd64
106	if (key->flag == ARCFOUR_ON_AMD64) {
107		arcfour_crypt_asm(key, in, out, len);
108	} else { /* Intel EM64T */
109#endif	/* amd64 */
110
111	size_t		ii;
112	uchar_t		i, j, ti, tj;
113#ifdef ARCFOUR_LOOP_OPTIMIZED
114	uchar_t		arr_ij;
115#endif
116#ifdef __amd64
117	uint32_t	*arr;
118#else
119	uchar_t		*arr;
120#endif
121
122#ifdef	sun4u
123	/*
124	 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
125	 * the cases where the input and output buffers are aligned on
126	 * a multiple of 8-byte boundary.
127	 */
128	int		index;
129	uchar_t		tmp;
130
131	index = (((uint64_t)(uintptr_t)in) & 0x7);
132
133	/* Get the 'in' on an 8-byte alignment */
134	if (index > 0) {
135		i = key->i;
136		j = key->j;
137		for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
138		    (index-- > 0) && len > 0;
139		    len--, in++, out++) {
140			++i;
141			j = j + key->arr[i];
142			tmp = key->arr[i];
143			key->arr[i] = key->arr[j];
144			key->arr[j] = tmp;
145			tmp = key->arr[i] + key->arr[j];
146			*out = *in ^ key->arr[tmp];
147		}
148		key->i = i;
149		key->j = j;
150	}
151
152	if (len == 0)
153		return;
154
155	/* See if we're fortunate and 'out' got aligned as well */
156
157	if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
158#endif	/* sun4u */
159
160	i = key->i;
161	j = key->j;
162	arr = key->arr;
163
164#ifndef ARCFOUR_LOOP_OPTIMIZED
165	/*
166	 * This loop is hasn't been reordered, but is kept for reference
167	 * purposes as it's more readable
168	 */
169	for (ii = 0; ii < len; ++ii) {
170		++i;
171		ti = arr[i];
172		j = j + ti;
173		tj = arr[j];
174		arr[j] = ti;
175		arr[i] = tj;
176		out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
177	}
178
179#else
180	/*
181	 * This for loop is optimized by carefully spreading out
182	 * memory access and storage to avoid conflicts,
183	 * allowing the processor to process operations in parallel
184	 */
185
186	/* for loop setup */
187	++i;
188	ti = arr[i];
189	j = j + ti;
190	tj = arr[j];
191	arr[j] = ti;
192	arr[i] = tj;
193	arr_ij = arr[(ti + tj) & 0xff];
194	--len;
195
196	for (ii = 0; ii < len; ) {
197		++i;
198		ti = arr[i];
199		j = j + ti;
200		tj = arr[j];
201		arr[j] = ti;
202		arr[i] = tj;
203
204		/* save result from previous loop: */
205		out[ii] = in[ii] ^ arr_ij;
206
207		++ii;
208		arr_ij = arr[(ti + tj) & 0xff];
209	}
210	/* save result from last loop: */
211	out[ii] = in[ii] ^ arr_ij;
212#endif
213
214	key->i = i;
215	key->j = j;
216
217#ifdef	sun4u
218	} else {
219		arcfour_crypt_aligned(key, len, in, out);
220	}
221#endif	/* sun4u */
222#ifdef	__amd64
223	}
224#endif	/* amd64 */
225}
226
227
228#ifdef	__amd64
229/*
230 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
231 * Cache the result, as the CPU can't change.
232 *
233 * Note: the userland version uses getisax() and checks for an AMD-64-only
234 * feature.  The kernel version uses cpuid_getvendor().
235 */
236int
237arcfour_crypt_on_intel(void)
238{
239	static int	cached_result = -1;
240
241	if (cached_result == -1) { /* first time */
242#ifdef _KERNEL
243		cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
244#else
245		uint_t	ui;
246
247		(void) getisax(&ui, 1);
248		cached_result = ((ui & AV_386_AMD_MMX) == 0);
249#endif	/* _KERNEL */
250	}
251
252	return (cached_result);
253}
254#endif	/* __amd64 */
255