1/*
2 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5/*
6 * Copyright 2013 Saso Kiselkov.  All rights reserved.
7 */
8
9/*
10 * The basic framework for this code came from the reference
11 * implementation for MD5.  That implementation is Copyright (C)
12 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
13 *
14 * License to copy and use this software is granted provided that it
15 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
16 * Algorithm" in all material mentioning or referencing this software
17 * or this function.
18 *
19 * License is also granted to make and use derivative works provided
20 * that such works are identified as "derived from the RSA Data
21 * Security, Inc. MD5 Message-Digest Algorithm" in all material
22 * mentioning or referencing the derived work.
23 *
24 * RSA Data Security, Inc. makes no representations concerning either
25 * the merchantability of this software or the suitability of this
26 * software for any particular purpose. It is provided "as is"
27 * without express or implied warranty of any kind.
28 *
29 * These notices must be retained in any copies of any part of this
30 * documentation and/or software.
31 *
32 * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
33 * standard, available at
34 * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
35 * Not as fast as one would like -- further optimizations are encouraged
36 * and appreciated.
37 */
38
39#ifndef _KERNEL
40#include <stdint.h>
41#include <strings.h>
42#include <stdlib.h>
43#include <errno.h>
44#endif /* _KERNEL */
45
46#include <sys/types.h>
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/sysmacros.h>
50#define	_SHA2_IMPL
51#include <sys/sha2.h>
52#include <sys/sha2_consts.h>
53
54#ifdef _KERNEL
55#include <sys/cmn_err.h>
56
57#else
58#pragma weak SHA256Update = SHA2Update
59#pragma weak SHA384Update = SHA2Update
60#pragma weak SHA512Update = SHA2Update
61
62#pragma weak SHA256Final = SHA2Final
63#pragma weak SHA384Final = SHA2Final
64#pragma weak SHA512Final = SHA2Final
65
66#endif	/* _KERNEL */
67
68#ifdef _LITTLE_ENDIAN
69#include <sys/byteorder.h>
70#define	HAVE_HTONL
71#endif
72
73static void Encode(uint8_t *, uint32_t *, size_t);
74static void Encode64(uint8_t *, uint64_t *, size_t);
75
76#if	defined(__amd64)
77#define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
78#define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
79
80void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
81void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
82
83#else
84static void SHA256Transform(SHA2_CTX *, const uint8_t *);
85static void SHA512Transform(SHA2_CTX *, const uint8_t *);
86#endif	/* __amd64 */
87
88static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
89
90/* Ch and Maj are the basic SHA2 functions. */
91#define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
92#define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
93
94/* Rotates x right n bits. */
95#define	ROTR(x, n)	\
96	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
97
98/* Shift x right n bits */
99#define	SHR(x, n)	((x) >> (n))
100
101/* SHA256 Functions */
102#define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
103#define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
104#define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
105#define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
106
107#define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
108	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
109	d += T1;							\
110	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
111	h = T1 + T2
112
113/* SHA384/512 Functions */
114#define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
115#define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
116#define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
117#define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
118#define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
119	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
120	d += T1;							\
121	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
122	h = T1 + T2
123
124/*
125 * sparc optimization:
126 *
127 * on the sparc, we can load big endian 32-bit data easily.  note that
128 * special care must be taken to ensure the address is 32-bit aligned.
129 * in the interest of speed, we don't check to make sure, since
130 * careful programming can guarantee this for us.
131 */
132
133#if	defined(_BIG_ENDIAN)
134#define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
135#define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
136
137#elif	defined(HAVE_HTONL)
138#define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
139#define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
140
141#else
142/* little endian -- will work on big endian, but slowly */
143#define	LOAD_BIG_32(addr)	\
144	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
145#define	LOAD_BIG_64(addr)	\
146	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
147	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
148	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
149	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
150#endif	/* _BIG_ENDIAN */
151
152
153#if	!defined(__amd64)
154/* SHA256 Transform */
155
156static void
157SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
158{
159	uint32_t a = ctx->state.s32[0];
160	uint32_t b = ctx->state.s32[1];
161	uint32_t c = ctx->state.s32[2];
162	uint32_t d = ctx->state.s32[3];
163	uint32_t e = ctx->state.s32[4];
164	uint32_t f = ctx->state.s32[5];
165	uint32_t g = ctx->state.s32[6];
166	uint32_t h = ctx->state.s32[7];
167
168	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
169	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
170	uint32_t T1, T2;
171
172#if	defined(__sparc)
173	static const uint32_t sha256_consts[] = {
174		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
175		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
176		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
177		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
178		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
179		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
180		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
181		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
182		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
183		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
184		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
185		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
186		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
187		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
188		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
189		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
190		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
191		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
192		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
193		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
194		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
195		SHA256_CONST_63
196	};
197#endif	/* __sparc */
198
199	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
200		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
201		blk = (uint8_t *)ctx->buf_un.buf32;
202	}
203
204	/* LINTED E_BAD_PTR_CAST_ALIGN */
205	w0 =  LOAD_BIG_32(blk + 4 * 0);
206	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
207	/* LINTED E_BAD_PTR_CAST_ALIGN */
208	w1 =  LOAD_BIG_32(blk + 4 * 1);
209	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
210	/* LINTED E_BAD_PTR_CAST_ALIGN */
211	w2 =  LOAD_BIG_32(blk + 4 * 2);
212	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
213	/* LINTED E_BAD_PTR_CAST_ALIGN */
214	w3 =  LOAD_BIG_32(blk + 4 * 3);
215	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
216	/* LINTED E_BAD_PTR_CAST_ALIGN */
217	w4 =  LOAD_BIG_32(blk + 4 * 4);
218	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
219	/* LINTED E_BAD_PTR_CAST_ALIGN */
220	w5 =  LOAD_BIG_32(blk + 4 * 5);
221	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
222	/* LINTED E_BAD_PTR_CAST_ALIGN */
223	w6 =  LOAD_BIG_32(blk + 4 * 6);
224	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
225	/* LINTED E_BAD_PTR_CAST_ALIGN */
226	w7 =  LOAD_BIG_32(blk + 4 * 7);
227	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
228	/* LINTED E_BAD_PTR_CAST_ALIGN */
229	w8 =  LOAD_BIG_32(blk + 4 * 8);
230	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
231	/* LINTED E_BAD_PTR_CAST_ALIGN */
232	w9 =  LOAD_BIG_32(blk + 4 * 9);
233	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
234	/* LINTED E_BAD_PTR_CAST_ALIGN */
235	w10 =  LOAD_BIG_32(blk + 4 * 10);
236	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
237	/* LINTED E_BAD_PTR_CAST_ALIGN */
238	w11 =  LOAD_BIG_32(blk + 4 * 11);
239	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
240	/* LINTED E_BAD_PTR_CAST_ALIGN */
241	w12 =  LOAD_BIG_32(blk + 4 * 12);
242	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
243	/* LINTED E_BAD_PTR_CAST_ALIGN */
244	w13 =  LOAD_BIG_32(blk + 4 * 13);
245	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
246	/* LINTED E_BAD_PTR_CAST_ALIGN */
247	w14 =  LOAD_BIG_32(blk + 4 * 14);
248	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
249	/* LINTED E_BAD_PTR_CAST_ALIGN */
250	w15 =  LOAD_BIG_32(blk + 4 * 15);
251	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
252
253	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
254	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
255	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
256	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
257	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
258	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
259	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
260	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
261	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
262	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
263	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
264	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
265	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
266	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
267	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
268	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
269	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
270	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
271	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
272	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
273	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
274	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
275	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
276	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
277	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
278	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
279	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
280	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
281	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
282	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
283	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
284	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
285
286	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
287	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
288	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
289	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
290	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
291	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
292	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
293	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
294	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
295	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
296	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
297	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
298	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
299	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
300	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
301	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
302	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
303	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
304	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
305	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
306	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
307	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
308	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
309	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
310	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
311	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
312	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
313	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
314	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
315	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
316	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
317	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
318
319	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
320	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
321	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
322	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
323	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
324	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
325	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
326	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
327	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
328	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
329	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
330	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
331	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
332	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
333	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
334	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
335	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
336	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
337	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
338	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
339	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
340	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
341	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
342	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
343	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
344	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
345	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
346	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
347	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
348	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
349	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
350	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
351
352	ctx->state.s32[0] += a;
353	ctx->state.s32[1] += b;
354	ctx->state.s32[2] += c;
355	ctx->state.s32[3] += d;
356	ctx->state.s32[4] += e;
357	ctx->state.s32[5] += f;
358	ctx->state.s32[6] += g;
359	ctx->state.s32[7] += h;
360}
361
362
363/* SHA384 and SHA512 Transform */
364
365static void
366SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
367{
368
369	uint64_t a = ctx->state.s64[0];
370	uint64_t b = ctx->state.s64[1];
371	uint64_t c = ctx->state.s64[2];
372	uint64_t d = ctx->state.s64[3];
373	uint64_t e = ctx->state.s64[4];
374	uint64_t f = ctx->state.s64[5];
375	uint64_t g = ctx->state.s64[6];
376	uint64_t h = ctx->state.s64[7];
377
378	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
379	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
380	uint64_t T1, T2;
381
382#if	defined(__sparc)
383	static const uint64_t sha512_consts[] = {
384		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
385		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
386		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
387		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
388		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
389		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
390		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
391		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
392		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
393		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
394		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
395		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
396		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
397		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
398		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
399		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
400		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
401		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
402		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
403		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
404		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
405		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
406		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
407		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
408		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
409		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
410		SHA512_CONST_78, SHA512_CONST_79
411	};
412#endif	/* __sparc */
413
414
415	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
416		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
417		blk = (uint8_t *)ctx->buf_un.buf64;
418	}
419
420	/* LINTED E_BAD_PTR_CAST_ALIGN */
421	w0 =  LOAD_BIG_64(blk + 8 * 0);
422	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
423	/* LINTED E_BAD_PTR_CAST_ALIGN */
424	w1 =  LOAD_BIG_64(blk + 8 * 1);
425	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
426	/* LINTED E_BAD_PTR_CAST_ALIGN */
427	w2 =  LOAD_BIG_64(blk + 8 * 2);
428	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
429	/* LINTED E_BAD_PTR_CAST_ALIGN */
430	w3 =  LOAD_BIG_64(blk + 8 * 3);
431	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
432	/* LINTED E_BAD_PTR_CAST_ALIGN */
433	w4 =  LOAD_BIG_64(blk + 8 * 4);
434	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
435	/* LINTED E_BAD_PTR_CAST_ALIGN */
436	w5 =  LOAD_BIG_64(blk + 8 * 5);
437	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
438	/* LINTED E_BAD_PTR_CAST_ALIGN */
439	w6 =  LOAD_BIG_64(blk + 8 * 6);
440	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
441	/* LINTED E_BAD_PTR_CAST_ALIGN */
442	w7 =  LOAD_BIG_64(blk + 8 * 7);
443	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
444	/* LINTED E_BAD_PTR_CAST_ALIGN */
445	w8 =  LOAD_BIG_64(blk + 8 * 8);
446	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
447	/* LINTED E_BAD_PTR_CAST_ALIGN */
448	w9 =  LOAD_BIG_64(blk + 8 * 9);
449	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
450	/* LINTED E_BAD_PTR_CAST_ALIGN */
451	w10 =  LOAD_BIG_64(blk + 8 * 10);
452	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
453	/* LINTED E_BAD_PTR_CAST_ALIGN */
454	w11 =  LOAD_BIG_64(blk + 8 * 11);
455	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
456	/* LINTED E_BAD_PTR_CAST_ALIGN */
457	w12 =  LOAD_BIG_64(blk + 8 * 12);
458	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
459	/* LINTED E_BAD_PTR_CAST_ALIGN */
460	w13 =  LOAD_BIG_64(blk + 8 * 13);
461	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
462	/* LINTED E_BAD_PTR_CAST_ALIGN */
463	w14 =  LOAD_BIG_64(blk + 8 * 14);
464	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
465	/* LINTED E_BAD_PTR_CAST_ALIGN */
466	w15 =  LOAD_BIG_64(blk + 8 * 15);
467	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
468
469	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
470	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
471	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
472	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
473	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
474	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
475	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
476	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
477	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
478	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
479	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
480	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
481	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
482	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
483	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
484	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
485	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
486	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
487	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
488	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
489	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
490	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
491	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
492	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
493	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
494	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
495	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
496	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
497	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
498	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
499	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
500	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
501
502	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
503	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
504	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
505	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
506	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
507	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
508	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
509	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
510	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
511	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
512	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
513	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
514	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
515	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
516	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
517	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
518	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
519	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
520	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
521	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
522	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
523	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
524	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
525	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
526	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
527	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
528	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
529	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
530	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
531	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
532	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
533	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
534
535	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
536	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
537	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
538	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
539	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
540	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
541	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
542	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
543	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
544	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
545	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
546	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
547	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
548	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
549	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
550	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
551	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
552	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
553	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
554	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
555	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
556	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
557	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
558	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
559	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
560	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
561	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
562	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
563	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
564	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
565	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
566	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
567
568	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
569	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
570	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
571	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
572	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
573	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
574	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
575	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
576	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
577	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
578	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
579	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
580	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
581	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
582	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
583	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
584	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
585	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
586	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
587	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
588	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
589	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
590	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
591	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
592	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
593	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
594	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
595	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
596	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
597	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
598	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
599	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
600
601	ctx->state.s64[0] += a;
602	ctx->state.s64[1] += b;
603	ctx->state.s64[2] += c;
604	ctx->state.s64[3] += d;
605	ctx->state.s64[4] += e;
606	ctx->state.s64[5] += f;
607	ctx->state.s64[6] += g;
608	ctx->state.s64[7] += h;
609
610}
611#endif	/* !__amd64 */
612
613
614/*
615 * Encode()
616 *
617 * purpose: to convert a list of numbers from little endian to big endian
618 *   input: uint8_t *	: place to store the converted big endian numbers
619 *	    uint32_t *	: place to get numbers to convert from
620 *          size_t	: the length of the input in bytes
621 *  output: void
622 */
623
624static void
625Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
626    size_t len)
627{
628	size_t		i, j;
629
630#if	defined(__sparc)
631	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
632		for (i = 0, j = 0; j < len; i++, j += 4) {
633			/* LINTED E_BAD_PTR_CAST_ALIGN */
634			*((uint32_t *)(output + j)) = input[i];
635		}
636	} else {
637#endif	/* little endian -- will work on big endian, but slowly */
638		for (i = 0, j = 0; j < len; i++, j += 4) {
639			output[j]	= (input[i] >> 24) & 0xff;
640			output[j + 1]	= (input[i] >> 16) & 0xff;
641			output[j + 2]	= (input[i] >>  8) & 0xff;
642			output[j + 3]	= input[i] & 0xff;
643		}
644#if	defined(__sparc)
645	}
646#endif
647}
648
649static void
650Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
651    size_t len)
652{
653	size_t		i, j;
654
655#if	defined(__sparc)
656	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
657		for (i = 0, j = 0; j < len; i++, j += 8) {
658			/* LINTED E_BAD_PTR_CAST_ALIGN */
659			*((uint64_t *)(output + j)) = input[i];
660		}
661	} else {
662#endif	/* little endian -- will work on big endian, but slowly */
663		for (i = 0, j = 0; j < len; i++, j += 8) {
664
665			output[j]	= (input[i] >> 56) & 0xff;
666			output[j + 1]	= (input[i] >> 48) & 0xff;
667			output[j + 2]	= (input[i] >> 40) & 0xff;
668			output[j + 3]	= (input[i] >> 32) & 0xff;
669			output[j + 4]	= (input[i] >> 24) & 0xff;
670			output[j + 5]	= (input[i] >> 16) & 0xff;
671			output[j + 6]	= (input[i] >>  8) & 0xff;
672			output[j + 7]	= input[i] & 0xff;
673		}
674#if	defined(__sparc)
675	}
676#endif
677}
678
679
680void
681SHA2Init(uint64_t mech, SHA2_CTX *ctx)
682{
683
684	switch (mech) {
685	case SHA256_MECH_INFO_TYPE:
686	case SHA256_HMAC_MECH_INFO_TYPE:
687	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
688		ctx->state.s32[0] = 0x6a09e667U;
689		ctx->state.s32[1] = 0xbb67ae85U;
690		ctx->state.s32[2] = 0x3c6ef372U;
691		ctx->state.s32[3] = 0xa54ff53aU;
692		ctx->state.s32[4] = 0x510e527fU;
693		ctx->state.s32[5] = 0x9b05688cU;
694		ctx->state.s32[6] = 0x1f83d9abU;
695		ctx->state.s32[7] = 0x5be0cd19U;
696		break;
697	case SHA384_MECH_INFO_TYPE:
698	case SHA384_HMAC_MECH_INFO_TYPE:
699	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
700		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
701		ctx->state.s64[1] = 0x629a292a367cd507ULL;
702		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
703		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
704		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
705		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
706		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
707		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
708		break;
709	case SHA512_MECH_INFO_TYPE:
710	case SHA512_HMAC_MECH_INFO_TYPE:
711	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
712		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
713		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
714		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
715		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
716		ctx->state.s64[4] = 0x510e527fade682d1ULL;
717		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
718		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
719		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
720		break;
721	case SHA512_224_MECH_INFO_TYPE:
722		ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
723		ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
724		ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
725		ctx->state.s64[3] = 0x679DD514582F9FCFULL;
726		ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
727		ctx->state.s64[5] = 0x77E36F7304C48942ULL;
728		ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
729		ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
730		break;
731	case SHA512_256_MECH_INFO_TYPE:
732		ctx->state.s64[0] = 0x22312194FC2BF72CULL;
733		ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
734		ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
735		ctx->state.s64[3] = 0x963877195940EABDULL;
736		ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
737		ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
738		ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
739		ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
740		break;
741#ifdef _KERNEL
742	default:
743		cmn_err(CE_PANIC,
744		    "sha2_init: failed to find a supported algorithm: 0x%x",
745		    (uint32_t)mech);
746
747#endif /* _KERNEL */
748	}
749
750	ctx->algotype = (uint32_t)mech;
751	ctx->count.c64[0] = ctx->count.c64[1] = 0;
752}
753
754#ifndef _KERNEL
755
756#pragma inline(SHA256Init, SHA384Init, SHA512Init)
757void
758SHA256Init(SHA256_CTX *ctx)
759{
760	SHA2Init(SHA256, ctx);
761}
762
763void
764SHA384Init(SHA384_CTX *ctx)
765{
766	SHA2Init(SHA384, ctx);
767}
768
769void
770SHA512Init(SHA512_CTX *ctx)
771{
772	SHA2Init(SHA512, ctx);
773}
774
775#endif /* _KERNEL */
776
777/*
778 * SHA2Update()
779 *
780 * purpose: continues an sha2 digest operation, using the message block
781 *          to update the context.
782 *   input: SHA2_CTX *	: the context to update
783 *          void *	: the message block
784 *          size_t      : the length of the message block, in bytes
785 *  output: void
786 */
787
788void
789SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
790{
791	uint32_t	i, buf_index, buf_len, buf_limit;
792	const uint8_t	*input = inptr;
793	uint32_t	algotype = ctx->algotype;
794#if defined(__amd64)
795	uint32_t	block_count;
796#endif	/* !__amd64 */
797
798
799	/* check for noop */
800	if (input_len == 0)
801		return;
802
803	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
804		buf_limit = 64;
805
806		/* compute number of bytes mod 64 */
807		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
808
809		/* update number of bits */
810		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
811			ctx->count.c32[0]++;
812
813		ctx->count.c32[0] += (input_len >> 29);
814
815	} else {
816		buf_limit = 128;
817
818		/* compute number of bytes mod 128 */
819		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
820
821		/* update number of bits */
822		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
823			ctx->count.c64[0]++;
824
825		ctx->count.c64[0] += (input_len >> 29);
826	}
827
828	buf_len = buf_limit - buf_index;
829
830	/* transform as many times as possible */
831	i = 0;
832	if (input_len >= buf_len) {
833
834		/*
835		 * general optimization:
836		 *
837		 * only do initial bcopy() and SHA2Transform() if
838		 * buf_index != 0.  if buf_index == 0, we're just
839		 * wasting our time doing the bcopy() since there
840		 * wasn't any data left over from a previous call to
841		 * SHA2Update().
842		 */
843		if (buf_index) {
844			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
845			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
846				SHA256Transform(ctx, ctx->buf_un.buf8);
847			else
848				SHA512Transform(ctx, ctx->buf_un.buf8);
849
850			i = buf_len;
851		}
852
853#if !defined(__amd64)
854		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
855			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
856				SHA256Transform(ctx, &input[i]);
857			}
858		} else {
859			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
860				SHA512Transform(ctx, &input[i]);
861			}
862		}
863
864#else
865		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
866			block_count = (input_len - i) >> 6;
867			if (block_count > 0) {
868				SHA256TransformBlocks(ctx, &input[i],
869				    block_count);
870				i += block_count << 6;
871			}
872		} else {
873			block_count = (input_len - i) >> 7;
874			if (block_count > 0) {
875				SHA512TransformBlocks(ctx, &input[i],
876				    block_count);
877				i += block_count << 7;
878			}
879		}
880#endif	/* !__amd64 */
881
882		/*
883		 * general optimization:
884		 *
885		 * if i and input_len are the same, return now instead
886		 * of calling bcopy(), since the bcopy() in this case
887		 * will be an expensive noop.
888		 */
889
890		if (input_len == i)
891			return;
892
893		buf_index = 0;
894	}
895
896	/* buffer remaining input */
897	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
898}
899
900
901/*
902 * SHA2Final()
903 *
904 * purpose: ends an sha2 digest operation, finalizing the message digest and
905 *          zeroing the context.
906 *   input: uchar_t *	: a buffer to store the digest
907 *			: The function actually uses void* because many
908 *			: callers pass things other than uchar_t here.
909 *          SHA2_CTX *  : the context to finalize, save, and zero
910 *  output: void
911 */
912
913void
914SHA2Final(void *digest, SHA2_CTX *ctx)
915{
916	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
917	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
918	uint32_t	index;
919	uint32_t	algotype = ctx->algotype;
920
921	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
922		index  = (ctx->count.c32[1] >> 3) & 0x3f;
923		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
924		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
925		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
926		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
927	} else {
928		index  = (ctx->count.c64[1] >> 3) & 0x7f;
929		Encode64(bitcount_be64, ctx->count.c64,
930		    sizeof (bitcount_be64));
931		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
932		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
933		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
934			ctx->state.s64[6] = ctx->state.s64[7] = 0;
935			Encode64(digest, ctx->state.s64,
936			    sizeof (uint64_t) * 6);
937		} else if (algotype == SHA512_224_MECH_INFO_TYPE) {
938			uint8_t last[sizeof (uint64_t)];
939			/*
940			 * Since SHA-512/224 doesn't align well to 64-bit
941			 * boundaries, we must do the encoding in three steps:
942			 * 1) encode the three 64-bit words that fit neatly
943			 * 2) encode the last 64-bit word to a temp buffer
944			 * 3) chop out the lower 32-bits from the temp buffer
945			 *    and append them to the digest
946			 */
947			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
948			Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
949			bcopy(last, (uint8_t *)digest + 24, 4);
950		} else if (algotype == SHA512_256_MECH_INFO_TYPE) {
951			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
952		} else {
953			Encode64(digest, ctx->state.s64,
954			    sizeof (ctx->state.s64));
955		}
956	}
957
958	/* zeroize sensitive information */
959	bzero(ctx, sizeof (*ctx));
960}
961