xref: /illumos-gate/usr/src/common/crypto/sha2/sha2.c (revision 734b6a94)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 #pragma ident	"%Z%%M%	%I%	%E% SMI"
7 
8 
9 /*
10  * The basic framework for this code came from the reference
11  * implementation for MD5.  That implementation is Copyright (C)
12  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
13  *
14  * License to copy and use this software is granted provided that it
15  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
16  * Algorithm" in all material mentioning or referencing this software
17  * or this function.
18  *
19  * License is also granted to make and use derivative works provided
20  * that such works are identified as "derived from the RSA Data
21  * Security, Inc. MD5 Message-Digest Algorithm" in all material
22  * mentioning or referencing the derived work.
23  *
24  * RSA Data Security, Inc. makes no representations concerning either
25  * the merchantability of this software or the suitability of this
26  * software for any particular purpose. It is provided "as is"
27  * without express or implied warranty of any kind.
28  *
29  * These notices must be retained in any copies of any part of this
30  * documentation and/or software.
31  *
32  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
33  * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-2.htm
34  * Not as fast as one would like -- further optimizations are encouraged
35  * and appreciated.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysmacros.h>
42 #define	_SHA2_IMPL
43 #include <sys/sha2.h>
44 #include <sys/sha2_consts.h>
45 
46 #ifndef _KERNEL
47 
48 #include <strings.h>
49 #include <stdlib.h>
50 #include <errno.h>
51 
52 #pragma weak SHA256Update = SHA2Update
53 #pragma weak SHA384Update = SHA2Update
54 #pragma weak SHA512Update = SHA2Update
55 
56 #pragma weak SHA256Final = SHA2Final
57 #pragma weak SHA384Final = SHA2Final
58 #pragma weak SHA512Final = SHA2Final
59 
60 #endif	/* !_KERNEL */
61 
62 #ifdef _KERNEL
63 #include <sys/cmn_err.h>
64 #endif /* _KERNEL */
65 
66 static void Encode(uint8_t *, uint32_t *, size_t);
67 static void Encode64(uint8_t *, uint64_t *, size_t);
68 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
69 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
70 
71 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
72 
73 /* Ch and Maj are the basic SHA2 functions. */
74 #define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
75 #define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
76 
77 /* Rotates x right n bits. */
78 #define	ROTR(x, n)	\
79 	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
80 
81 /* Shift x right n bits */
82 #define	SHR(x, n)	((x) >> (n))
83 
84 /* SHA256 Functions */
85 #define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
86 #define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
87 #define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
88 #define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
89 
90 #define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
91 	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
92 	d += T1;							\
93 	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
94 	h = T1 + T2
95 
96 /* SHA384/512 Functions */
97 #define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
98 #define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
99 #define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
100 #define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
101 #define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
102 	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
103 	d += T1;							\
104 	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
105 	h = T1 + T2
106 
107 /*
108  * sparc optimization:
109  *
110  * on the sparc, we can load big endian 32-bit data easily.  note that
111  * special care must be taken to ensure the address is 32-bit aligned.
112  * in the interest of speed, we don't check to make sure, since
113  * careful programming can guarantee this for us.
114  */
115 
116 #if	defined(_BIG_ENDIAN)
117 
118 #define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
119 
120 #else	/* little endian -- will work on big endian, but slowly */
121 
122 #define	LOAD_BIG_32(addr)	\
123 	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
124 #endif
125 
126 
127 #if	defined(_BIG_ENDIAN)
128 
129 #define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
130 
131 #else	/* little endian -- will work on big endian, but slowly */
132 
133 #define	LOAD_BIG_64(addr)	\
134 	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
135 	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
136 	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
137 	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
138 
139 #endif
140 
141 
142 /* SHA256 Transform */
143 
144 static void
145 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
146 {
147 
148 	uint32_t a = ctx->state.s32[0];
149 	uint32_t b = ctx->state.s32[1];
150 	uint32_t c = ctx->state.s32[2];
151 	uint32_t d = ctx->state.s32[3];
152 	uint32_t e = ctx->state.s32[4];
153 	uint32_t f = ctx->state.s32[5];
154 	uint32_t g = ctx->state.s32[6];
155 	uint32_t h = ctx->state.s32[7];
156 
157 	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
158 	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
159 	uint32_t T1, T2;
160 
161 #if	defined(__sparc)
162 	static const uint32_t sha256_consts[] = {
163 		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
164 		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
165 		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
166 		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
167 		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
168 		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
169 		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
170 		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
171 		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
172 		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
173 		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
174 		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
175 		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
176 		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
177 		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
178 		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
179 		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
180 		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
181 		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
182 		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
183 		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
184 		SHA256_CONST_63
185 	};
186 #endif
187 
188 	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
189 		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
190 		blk = (uint8_t *)ctx->buf_un.buf32;
191 	}
192 
193 	/* LINTED E_BAD_PTR_CAST_ALIGN */
194 	w0 =  LOAD_BIG_32(blk + 4 * 0);
195 	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
196 	/* LINTED E_BAD_PTR_CAST_ALIGN */
197 	w1 =  LOAD_BIG_32(blk + 4 * 1);
198 	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
199 	/* LINTED E_BAD_PTR_CAST_ALIGN */
200 	w2 =  LOAD_BIG_32(blk + 4 * 2);
201 	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
202 	/* LINTED E_BAD_PTR_CAST_ALIGN */
203 	w3 =  LOAD_BIG_32(blk + 4 * 3);
204 	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
205 	/* LINTED E_BAD_PTR_CAST_ALIGN */
206 	w4 =  LOAD_BIG_32(blk + 4 * 4);
207 	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
208 	/* LINTED E_BAD_PTR_CAST_ALIGN */
209 	w5 =  LOAD_BIG_32(blk + 4 * 5);
210 	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
211 	/* LINTED E_BAD_PTR_CAST_ALIGN */
212 	w6 =  LOAD_BIG_32(blk + 4 * 6);
213 	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
214 	/* LINTED E_BAD_PTR_CAST_ALIGN */
215 	w7 =  LOAD_BIG_32(blk + 4 * 7);
216 	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
217 	/* LINTED E_BAD_PTR_CAST_ALIGN */
218 	w8 =  LOAD_BIG_32(blk + 4 * 8);
219 	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
220 	/* LINTED E_BAD_PTR_CAST_ALIGN */
221 	w9 =  LOAD_BIG_32(blk + 4 * 9);
222 	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
223 	/* LINTED E_BAD_PTR_CAST_ALIGN */
224 	w10 =  LOAD_BIG_32(blk + 4 * 10);
225 	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
226 	/* LINTED E_BAD_PTR_CAST_ALIGN */
227 	w11 =  LOAD_BIG_32(blk + 4 * 11);
228 	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
229 	/* LINTED E_BAD_PTR_CAST_ALIGN */
230 	w12 =  LOAD_BIG_32(blk + 4 * 12);
231 	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
232 	/* LINTED E_BAD_PTR_CAST_ALIGN */
233 	w13 =  LOAD_BIG_32(blk + 4 * 13);
234 	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
235 	/* LINTED E_BAD_PTR_CAST_ALIGN */
236 	w14 =  LOAD_BIG_32(blk + 4 * 14);
237 	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
238 	/* LINTED E_BAD_PTR_CAST_ALIGN */
239 	w15 =  LOAD_BIG_32(blk + 4 * 15);
240 	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
241 
242 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
243 	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
244 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
245 	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
246 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
247 	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
248 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
249 	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
250 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
251 	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
252 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
253 	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
254 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
255 	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
256 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
257 	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
258 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
259 	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
260 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
261 	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
262 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
263 	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
264 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
265 	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
266 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
267 	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
268 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
269 	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
270 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
271 	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
272 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
273 	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
274 
275 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
276 	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
277 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
278 	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
279 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
280 	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
281 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
282 	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
283 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
284 	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
285 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
286 	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
287 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
288 	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
289 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
290 	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
291 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
292 	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
293 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
294 	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
295 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
296 	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
297 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
298 	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
299 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
300 	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
301 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
302 	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
303 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
304 	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
305 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
306 	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
307 
308 	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
309 	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
310 	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
311 	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
312 	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
313 	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
314 	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
315 	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
316 	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
317 	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
318 	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
319 	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
320 	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
321 	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
322 	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
323 	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
324 	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
325 	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
326 	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
327 	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
328 	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
329 	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
330 	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
331 	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
332 	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
333 	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
334 	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
335 	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
336 	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
337 	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
338 	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
339 	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
340 
341 	ctx->state.s32[0] += a;
342 	ctx->state.s32[1] += b;
343 	ctx->state.s32[2] += c;
344 	ctx->state.s32[3] += d;
345 	ctx->state.s32[4] += e;
346 	ctx->state.s32[5] += f;
347 	ctx->state.s32[6] += g;
348 	ctx->state.s32[7] += h;
349 }
350 
351 
352 /* SHA384 and SHA512 Transform */
353 
354 static void
355 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
356 {
357 
358 	uint64_t a = ctx->state.s64[0];
359 	uint64_t b = ctx->state.s64[1];
360 	uint64_t c = ctx->state.s64[2];
361 	uint64_t d = ctx->state.s64[3];
362 	uint64_t e = ctx->state.s64[4];
363 	uint64_t f = ctx->state.s64[5];
364 	uint64_t g = ctx->state.s64[6];
365 	uint64_t h = ctx->state.s64[7];
366 
367 	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
368 	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
369 	uint64_t T1, T2;
370 
371 #if	defined(__sparc)
372 	static const uint64_t sha512_consts[] = {
373 		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
374 		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
375 		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
376 		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
377 		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
378 		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
379 		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
380 		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
381 		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
382 		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
383 		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
384 		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
385 		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
386 		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
387 		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
388 		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
389 		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
390 		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
391 		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
392 		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
393 		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
394 		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
395 		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
396 		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
397 		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
398 		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
399 		SHA512_CONST_78, SHA512_CONST_79
400 	};
401 #endif
402 
403 
404 	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
405 		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
406 		blk = (uint8_t *)ctx->buf_un.buf64;
407 	}
408 
409 	/* LINTED E_BAD_PTR_CAST_ALIGN */
410 	w0 =  LOAD_BIG_64(blk + 8 * 0);
411 	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
412 	/* LINTED E_BAD_PTR_CAST_ALIGN */
413 	w1 =  LOAD_BIG_64(blk + 8 * 1);
414 	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
415 	/* LINTED E_BAD_PTR_CAST_ALIGN */
416 	w2 =  LOAD_BIG_64(blk + 8 * 2);
417 	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
418 	/* LINTED E_BAD_PTR_CAST_ALIGN */
419 	w3 =  LOAD_BIG_64(blk + 8 * 3);
420 	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
421 	/* LINTED E_BAD_PTR_CAST_ALIGN */
422 	w4 =  LOAD_BIG_64(blk + 8 * 4);
423 	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
424 	/* LINTED E_BAD_PTR_CAST_ALIGN */
425 	w5 =  LOAD_BIG_64(blk + 8 * 5);
426 	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
427 	/* LINTED E_BAD_PTR_CAST_ALIGN */
428 	w6 =  LOAD_BIG_64(blk + 8 * 6);
429 	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
430 	/* LINTED E_BAD_PTR_CAST_ALIGN */
431 	w7 =  LOAD_BIG_64(blk + 8 * 7);
432 	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
433 	/* LINTED E_BAD_PTR_CAST_ALIGN */
434 	w8 =  LOAD_BIG_64(blk + 8 * 8);
435 	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
436 	/* LINTED E_BAD_PTR_CAST_ALIGN */
437 	w9 =  LOAD_BIG_64(blk + 8 * 9);
438 	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
439 	/* LINTED E_BAD_PTR_CAST_ALIGN */
440 	w10 =  LOAD_BIG_64(blk + 8 * 10);
441 	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
442 	/* LINTED E_BAD_PTR_CAST_ALIGN */
443 	w11 =  LOAD_BIG_64(blk + 8 * 11);
444 	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
445 	/* LINTED E_BAD_PTR_CAST_ALIGN */
446 	w12 =  LOAD_BIG_64(blk + 8 * 12);
447 	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
448 	/* LINTED E_BAD_PTR_CAST_ALIGN */
449 	w13 =  LOAD_BIG_64(blk + 8 * 13);
450 	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
451 	/* LINTED E_BAD_PTR_CAST_ALIGN */
452 	w14 =  LOAD_BIG_64(blk + 8 * 14);
453 	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
454 	/* LINTED E_BAD_PTR_CAST_ALIGN */
455 	w15 =  LOAD_BIG_64(blk + 8 * 15);
456 	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
457 
458 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
459 	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
460 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
461 	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
462 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
463 	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
464 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
465 	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
466 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
467 	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
468 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
469 	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
470 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
471 	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
472 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
473 	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
474 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
475 	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
476 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
477 	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
478 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
479 	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
480 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
481 	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
482 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
483 	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
484 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
485 	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
486 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
487 	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
488 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
489 	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
490 
491 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
492 	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
493 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
494 	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
495 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
496 	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
497 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
498 	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
499 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
500 	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
501 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
502 	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
503 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
504 	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
505 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
506 	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
507 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
508 	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
509 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
510 	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
511 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
512 	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
513 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
514 	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
515 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
516 	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
517 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
518 	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
519 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
520 	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
521 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
522 	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
523 
524 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
525 	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
526 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
527 	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
528 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
529 	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
530 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
531 	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
532 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
533 	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
534 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
535 	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
536 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
537 	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
538 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
539 	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
540 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
541 	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
542 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
543 	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
544 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
545 	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
546 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
547 	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
548 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
549 	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
550 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
551 	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
552 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
553 	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
554 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
555 	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
556 
557 	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
558 	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
559 	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
560 	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
561 	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
562 	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
563 	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
564 	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
565 	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
566 	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
567 	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
568 	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
569 	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
570 	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
571 	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
572 	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
573 	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
574 	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
575 	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
576 	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
577 	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
578 	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
579 	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
580 	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
581 	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
582 	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
583 	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
584 	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
585 	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
586 	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
587 	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
588 	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
589 
590 	ctx->state.s64[0] += a;
591 	ctx->state.s64[1] += b;
592 	ctx->state.s64[2] += c;
593 	ctx->state.s64[3] += d;
594 	ctx->state.s64[4] += e;
595 	ctx->state.s64[5] += f;
596 	ctx->state.s64[6] += g;
597 	ctx->state.s64[7] += h;
598 
599 }
600 
601 
602 /*
603  * Encode()
604  *
605  * purpose: to convert a list of numbers from little endian to big endian
606  *   input: uint8_t *	: place to store the converted big endian numbers
607  *	    uint32_t *	: place to get numbers to convert from
608  *          size_t	: the length of the input in bytes
609  *  output: void
610  */
611 
612 static void
613 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
614     size_t len)
615 {
616 	size_t		i, j;
617 
618 #if	defined(__sparc)
619 	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
620 		for (i = 0, j = 0; j < len; i++, j += 4) {
621 			/* LINTED: pointer alignment */
622 			*((uint32_t *)(output + j)) = input[i];
623 		}
624 	} else {
625 #endif	/* little endian -- will work on big endian, but slowly */
626 		for (i = 0, j = 0; j < len; i++, j += 4) {
627 			output[j]	= (input[i] >> 24) & 0xff;
628 			output[j + 1]	= (input[i] >> 16) & 0xff;
629 			output[j + 2]	= (input[i] >>  8) & 0xff;
630 			output[j + 3]	= input[i] & 0xff;
631 		}
632 #if	defined(__sparc)
633 	}
634 #endif
635 }
636 
637 static void
638 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
639     size_t len)
640 {
641 	size_t		i, j;
642 
643 #if	defined(__sparc)
644 	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
645 		for (i = 0, j = 0; j < len; i++, j += 8) {
646 			/* LINTED: pointer alignment */
647 			*((uint64_t *)(output + j)) = input[i];
648 		}
649 	} else {
650 #endif	/* little endian -- will work on big endian, but slowly */
651 		for (i = 0, j = 0; j < len; i++, j += 8) {
652 
653 			output[j]	= (input[i] >> 56) & 0xff;
654 			output[j + 1]	= (input[i] >> 48) & 0xff;
655 			output[j + 2]	= (input[i] >> 40) & 0xff;
656 			output[j + 3]	= (input[i] >> 32) & 0xff;
657 			output[j + 4]	= (input[i] >> 24) & 0xff;
658 			output[j + 5]	= (input[i] >> 16) & 0xff;
659 			output[j + 6]	= (input[i] >>  8) & 0xff;
660 			output[j + 7]	= input[i] & 0xff;
661 		}
662 #if	defined(__sparc)
663 	}
664 #endif
665 }
666 
667 
668 void
669 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
670 {
671 
672 	switch (mech) {
673 	case SHA256_MECH_INFO_TYPE:
674 	case SHA256_HMAC_MECH_INFO_TYPE:
675 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
676 		ctx->state.s32[0] = 0x6a09e667U;
677 		ctx->state.s32[1] = 0xbb67ae85U;
678 		ctx->state.s32[2] = 0x3c6ef372U;
679 		ctx->state.s32[3] = 0xa54ff53aU;
680 		ctx->state.s32[4] = 0x510e527fU;
681 		ctx->state.s32[5] = 0x9b05688cU;
682 		ctx->state.s32[6] = 0x1f83d9abU;
683 		ctx->state.s32[7] = 0x5be0cd19U;
684 		break;
685 	case SHA384_MECH_INFO_TYPE:
686 	case SHA384_HMAC_MECH_INFO_TYPE:
687 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
688 		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
689 		ctx->state.s64[1] = 0x629a292a367cd507ULL;
690 		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
691 		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
692 		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
693 		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
694 		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
695 		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
696 		break;
697 	case SHA512_MECH_INFO_TYPE:
698 	case SHA512_HMAC_MECH_INFO_TYPE:
699 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
700 		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
701 		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
702 		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
703 		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
704 		ctx->state.s64[4] = 0x510e527fade682d1ULL;
705 		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
706 		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
707 		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
708 		break;
709 #ifdef _KERNEL
710 	default:
711 		cmn_err(CE_PANIC, "sha2_init: "
712 		    "failed to find a supported algorithm: 0x%x",
713 		    (uint32_t)mech);
714 
715 #endif /* _KERNEL */
716 	}
717 
718 	ctx->algotype = mech;
719 	ctx->count.c64[0] = ctx->count.c64[1] = 0;
720 }
721 
722 #ifndef _KERNEL
723 
724 #pragma inline(SHA256Init, SHA384Init, SHA512Init)
725 void
726 SHA256Init(SHA256_CTX *ctx)
727 {
728 	SHA2Init(SHA256, ctx);
729 }
730 
731 void
732 SHA384Init(SHA384_CTX *ctx)
733 {
734 	SHA2Init(SHA384, ctx);
735 }
736 
737 void
738 SHA512Init(SHA512_CTX *ctx)
739 {
740 	SHA2Init(SHA512, ctx);
741 }
742 
743 #endif /* _KERNEL */
744 
745 /*
746  * SHA2Update()
747  *
748  * purpose: continues an sha2 digest operation, using the message block
749  *          to update the context.
750  *   input: SHA2_CTX *	: the context to update
751  *          void *	: the message block
752  *          size_t    : the length of the message block in bytes
753  *  output: void
754  */
755 
756 void
757 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
758 {
759 	uint32_t i, buf_index, buf_len, buf_limit;
760 	const uint8_t *input = inptr;
761 
762 	/* check for noop */
763 	if (input_len == 0)
764 		return;
765 
766 	if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
767 		buf_limit = 64;
768 
769 		/* compute number of bytes mod 64 */
770 		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
771 
772 		/* update number of bits */
773 		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
774 			ctx->count.c32[0]++;
775 
776 		ctx->count.c32[0] += (input_len >> 29);
777 
778 	} else {
779 		buf_limit = 128;
780 
781 		/* compute number of bytes mod 128 */
782 		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
783 
784 		/* update number of bits */
785 		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
786 			ctx->count.c64[0]++;
787 
788 		ctx->count.c64[0] += (input_len >> 29);
789 	}
790 
791 	buf_len = buf_limit - buf_index;
792 
793 	/* transform as many times as possible */
794 	i = 0;
795 	if (input_len >= buf_len) {
796 
797 		/*
798 		 * general optimization:
799 		 *
800 		 * only do initial bcopy() and SHA2Transform() if
801 		 * buf_index != 0.  if buf_index == 0, we're just
802 		 * wasting our time doing the bcopy() since there
803 		 * wasn't any data left over from a previous call to
804 		 * SHA2Update().
805 		 */
806 		if (buf_index) {
807 			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
808 			if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
809 				SHA256Transform(ctx, ctx->buf_un.buf8);
810 			else
811 				SHA512Transform(ctx, ctx->buf_un.buf8);
812 
813 			i = buf_len;
814 		}
815 
816 
817 		for (; i + buf_limit - 1 < input_len; i += buf_limit) {
818 			if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
819 				SHA256Transform(ctx, &input[i]);
820 			else
821 				SHA512Transform(ctx, &input[i]);
822 		}
823 
824 		/*
825 		 * general optimization:
826 		 *
827 		 * if i and input_len are the same, return now instead
828 		 * of calling bcopy(), since the bcopy() in this case
829 		 * will be an expensive nop.
830 		 */
831 
832 		if (input_len == i)
833 			return;
834 
835 		buf_index = 0;
836 	}
837 
838 	/* buffer remaining input */
839 	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
840 }
841 
842 
843 /*
844  * SHA2Final()
845  *
846  * purpose: ends an sha2 digest operation, finalizing the message digest and
847  *          zeroing the context.
848  *   input: uint8_t *	: a buffer to store the digest in
849  *          SHA2_CTX *  : the context to finalize, save, and zero
850  *  output: void
851  */
852 
853 void
854 SHA2Final(void *digest, SHA2_CTX *ctx)
855 {
856 	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
857 	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
858 	uint32_t	index;
859 
860 
861 	if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
862 		index  = (ctx->count.c32[1] >> 3) & 0x3f;
863 		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
864 		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
865 		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
866 		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
867 
868 	} else {
869 		index  = (ctx->count.c64[1] >> 3) & 0x7f;
870 		Encode64(bitcount_be64, ctx->count.c64,
871 		    sizeof (bitcount_be64));
872 		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
873 		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
874 		if (ctx->algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
875 			ctx->state.s64[6] = ctx->state.s64[7] = 0;
876 			Encode64(digest, ctx->state.s64,
877 			    sizeof (uint64_t) * 6);
878 		} else
879 			Encode64(digest, ctx->state.s64,
880 			    sizeof (ctx->state.s64));
881 	}
882 
883 	/* zeroize sensitive information */
884 	bzero(ctx, sizeof (*ctx));
885 }
886