xref: /illumos-gate/usr/src/common/crypto/ecc/ecp_256.c (revision f9fbec18)
1 /*
2  * ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  * http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * The Original Code is the elliptic curve math library for prime field curves.
16  *
17  * The Initial Developer of the Original Code is
18  * Sun Microsystems, Inc.
19  * Portions created by the Initial Developer are Copyright (C) 2003
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *   Douglas Stebila <douglas@stebila.ca>
24  *
25  * Alternatively, the contents of this file may be used under the terms of
26  * either the GNU General Public License Version 2 or later (the "GPL"), or
27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28  * in which case the provisions of the GPL or the LGPL are applicable instead
29  * of those above. If you wish to allow use of your version of this file only
30  * under the terms of either the GPL or the LGPL, and not to allow others to
31  * use your version of this file under the terms of the MPL, indicate your
32  * decision by deleting the provisions above and replace them with the notice
33  * and other provisions required by the GPL or the LGPL. If you do not delete
34  * the provisions above, a recipient may use your version of this file under
35  * the terms of any one of the MPL, the GPL or the LGPL.
36  *
37  * ***** END LICENSE BLOCK ***** */
38 /*
39  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
40  * Use is subject to license terms.
41  *
42  * Sun elects to use this software under the MPL license.
43  */
44 
45 #pragma ident	"%Z%%M%	%I%	%E% SMI"
46 
47 #include "ecp.h"
48 #include "mpi.h"
49 #include "mplogic.h"
50 #include "mpi-priv.h"
51 #ifndef _KERNEL
52 #include <stdlib.h>
53 #endif
54 
55 /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1.  a can be r.
56  * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
57  * Elliptic Curve Cryptography. */
58 mp_err
59 ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
60 {
61 	mp_err res = MP_OKAY;
62 	mp_size a_used = MP_USED(a);
63 	int a_bits = mpl_significant_bits(a);
64 	mp_digit carry;
65 
66 #ifdef ECL_THIRTY_TWO_BIT
67 	mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0;
68 	mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
69 	int r8; /* must be a signed value ! */
70 #else
71 	mp_digit a4=0, a5=0, a6=0, a7=0;
72 	mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
73 	mp_digit r0, r1, r2, r3;
74 	int r4; /* must be a signed value ! */
75 #endif
76 	/* for polynomials larger than twice the field size
77 	 * use regular reduction */
78 	if (a_bits < 256) {
79 		if (a == r) return MP_OKAY;
80 		return mp_copy(a,r);
81 	}
82 	if (a_bits > 512)  {
83 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
84 	} else {
85 
86 #ifdef ECL_THIRTY_TWO_BIT
87 		switch (a_used) {
88 		case 16:
89 			a15 = MP_DIGIT(a,15);
90 		case 15:
91 			a14 = MP_DIGIT(a,14);
92 		case 14:
93 			a13 = MP_DIGIT(a,13);
94 		case 13:
95 			a12 = MP_DIGIT(a,12);
96 		case 12:
97 			a11 = MP_DIGIT(a,11);
98 		case 11:
99 			a10 = MP_DIGIT(a,10);
100 		case 10:
101 			a9 = MP_DIGIT(a,9);
102 		case 9:
103 			a8 = MP_DIGIT(a,8);
104 		}
105 
106 		r0 = MP_DIGIT(a,0);
107 		r1 = MP_DIGIT(a,1);
108 		r2 = MP_DIGIT(a,2);
109 		r3 = MP_DIGIT(a,3);
110 		r4 = MP_DIGIT(a,4);
111 		r5 = MP_DIGIT(a,5);
112 		r6 = MP_DIGIT(a,6);
113 		r7 = MP_DIGIT(a,7);
114 
115 		/* sum 1 */
116 		MP_ADD_CARRY(r3, a11, r3, 0,     carry);
117 		MP_ADD_CARRY(r4, a12, r4, carry, carry);
118 		MP_ADD_CARRY(r5, a13, r5, carry, carry);
119 		MP_ADD_CARRY(r6, a14, r6, carry, carry);
120 		MP_ADD_CARRY(r7, a15, r7, carry, carry);
121 		r8 = carry;
122 		MP_ADD_CARRY(r3, a11, r3, 0,     carry);
123 		MP_ADD_CARRY(r4, a12, r4, carry, carry);
124 		MP_ADD_CARRY(r5, a13, r5, carry, carry);
125 		MP_ADD_CARRY(r6, a14, r6, carry, carry);
126 		MP_ADD_CARRY(r7, a15, r7, carry, carry);
127 		r8 += carry;
128 		/* sum 2 */
129 		MP_ADD_CARRY(r3, a12, r3, 0,     carry);
130 		MP_ADD_CARRY(r4, a13, r4, carry, carry);
131 		MP_ADD_CARRY(r5, a14, r5, carry, carry);
132 		MP_ADD_CARRY(r6, a15, r6, carry, carry);
133 		MP_ADD_CARRY(r7,   0, r7, carry, carry);
134 		r8 += carry;
135 		/* combine last bottom of sum 3 with second sum 2 */
136 		MP_ADD_CARRY(r0, a8,  r0, 0,     carry);
137 		MP_ADD_CARRY(r1, a9,  r1, carry, carry);
138 		MP_ADD_CARRY(r2, a10, r2, carry, carry);
139 		MP_ADD_CARRY(r3, a12, r3, carry, carry);
140 		MP_ADD_CARRY(r4, a13, r4, carry, carry);
141 		MP_ADD_CARRY(r5, a14, r5, carry, carry);
142 		MP_ADD_CARRY(r6, a15, r6, carry, carry);
143 		MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */
144 		r8 += carry;
145 		/* sum 3 (rest of it)*/
146 		MP_ADD_CARRY(r6, a14, r6, 0,     carry);
147 		MP_ADD_CARRY(r7,   0, r7, carry, carry);
148 		r8 += carry;
149 		/* sum 4 (rest of it)*/
150 		MP_ADD_CARRY(r0, a9,  r0, 0,     carry);
151 		MP_ADD_CARRY(r1, a10, r1, carry, carry);
152 		MP_ADD_CARRY(r2, a11, r2, carry, carry);
153 		MP_ADD_CARRY(r3, a13, r3, carry, carry);
154 		MP_ADD_CARRY(r4, a14, r4, carry, carry);
155 		MP_ADD_CARRY(r5, a15, r5, carry, carry);
156 		MP_ADD_CARRY(r6, a13, r6, carry, carry);
157 		MP_ADD_CARRY(r7, a8,  r7, carry, carry);
158 		r8 += carry;
159 		/* diff 5 */
160 		MP_SUB_BORROW(r0, a11, r0, 0,     carry);
161 		MP_SUB_BORROW(r1, a12, r1, carry, carry);
162 		MP_SUB_BORROW(r2, a13, r2, carry, carry);
163 		MP_SUB_BORROW(r3,   0, r3, carry, carry);
164 		MP_SUB_BORROW(r4,   0, r4, carry, carry);
165 		MP_SUB_BORROW(r5,   0, r5, carry, carry);
166 		MP_SUB_BORROW(r6, a8,  r6, carry, carry);
167 		MP_SUB_BORROW(r7, a10, r7, carry, carry);
168 		r8 -= carry;
169 		/* diff 6 */
170 		MP_SUB_BORROW(r0, a12, r0, 0,     carry);
171 		MP_SUB_BORROW(r1, a13, r1, carry, carry);
172 		MP_SUB_BORROW(r2, a14, r2, carry, carry);
173 		MP_SUB_BORROW(r3, a15, r3, carry, carry);
174 		MP_SUB_BORROW(r4,   0, r4, carry, carry);
175 		MP_SUB_BORROW(r5,   0, r5, carry, carry);
176 		MP_SUB_BORROW(r6, a9,  r6, carry, carry);
177 		MP_SUB_BORROW(r7, a11, r7, carry, carry);
178 		r8 -= carry;
179 		/* diff 7 */
180 		MP_SUB_BORROW(r0, a13, r0, 0,     carry);
181 		MP_SUB_BORROW(r1, a14, r1, carry, carry);
182 		MP_SUB_BORROW(r2, a15, r2, carry, carry);
183 		MP_SUB_BORROW(r3, a8,  r3, carry, carry);
184 		MP_SUB_BORROW(r4, a9,  r4, carry, carry);
185 		MP_SUB_BORROW(r5, a10, r5, carry, carry);
186 		MP_SUB_BORROW(r6, 0,   r6, carry, carry);
187 		MP_SUB_BORROW(r7, a12, r7, carry, carry);
188 		r8 -= carry;
189 		/* diff 8 */
190 		MP_SUB_BORROW(r0, a14, r0, 0,     carry);
191 		MP_SUB_BORROW(r1, a15, r1, carry, carry);
192 		MP_SUB_BORROW(r2, 0,   r2, carry, carry);
193 		MP_SUB_BORROW(r3, a9,  r3, carry, carry);
194 		MP_SUB_BORROW(r4, a10, r4, carry, carry);
195 		MP_SUB_BORROW(r5, a11, r5, carry, carry);
196 		MP_SUB_BORROW(r6, 0,   r6, carry, carry);
197 		MP_SUB_BORROW(r7, a13, r7, carry, carry);
198 		r8 -= carry;
199 
200 		/* reduce the overflows */
201 		while (r8 > 0) {
202 			mp_digit r8_d = r8;
203 			MP_ADD_CARRY(r0, r8_d,         r0, 0,     carry);
204 			MP_ADD_CARRY(r1, 0,            r1, carry, carry);
205 			MP_ADD_CARRY(r2, 0,            r2, carry, carry);
206 			MP_ADD_CARRY(r3, -r8_d,        r3, carry, carry);
207 			MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry);
208 			MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry);
209 			MP_ADD_CARRY(r6, -(r8_d+1),    r6, carry, carry);
210 			MP_ADD_CARRY(r7, (r8_d-1),     r7, carry, carry);
211 			r8 = carry;
212 		}
213 
214 		/* reduce the underflows */
215 		while (r8 < 0) {
216 			mp_digit r8_d = -r8;
217 			MP_SUB_BORROW(r0, r8_d,         r0, 0,     carry);
218 			MP_SUB_BORROW(r1, 0,            r1, carry, carry);
219 			MP_SUB_BORROW(r2, 0,            r2, carry, carry);
220 			MP_SUB_BORROW(r3, -r8_d,        r3, carry, carry);
221 			MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry);
222 			MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry);
223 			MP_SUB_BORROW(r6, -(r8_d+1),    r6, carry, carry);
224 			MP_SUB_BORROW(r7, (r8_d-1),     r7, carry, carry);
225 			r8 = -carry;
226 		}
227 		if (a != r) {
228 			MP_CHECKOK(s_mp_pad(r,8));
229 		}
230 		MP_SIGN(r) = MP_ZPOS;
231 		MP_USED(r) = 8;
232 
233 		MP_DIGIT(r,7) = r7;
234 		MP_DIGIT(r,6) = r6;
235 		MP_DIGIT(r,5) = r5;
236 		MP_DIGIT(r,4) = r4;
237 		MP_DIGIT(r,3) = r3;
238 		MP_DIGIT(r,2) = r2;
239 		MP_DIGIT(r,1) = r1;
240 		MP_DIGIT(r,0) = r0;
241 
242 		/* final reduction if necessary */
243 		if ((r7 == MP_DIGIT_MAX) &&
244 			((r6 > 1) || ((r6 == 1) &&
245 			(r5 || r4 || r3 ||
246 				((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX)
247 				  && (r0 == MP_DIGIT_MAX)))))) {
248 			MP_CHECKOK(mp_sub(r, &meth->irr, r));
249 		}
250 #ifdef notdef
251 
252 
253 		/* smooth the negatives */
254 		while (MP_SIGN(r) != MP_ZPOS) {
255 			MP_CHECKOK(mp_add(r, &meth->irr, r));
256 		}
257 		while (MP_USED(r) > 8) {
258 			MP_CHECKOK(mp_sub(r, &meth->irr, r));
259 		}
260 
261 		/* final reduction if necessary */
262 		if (MP_DIGIT(r,7) >= MP_DIGIT(&meth->irr,7)) {
263 		    if (mp_cmp(r,&meth->irr) != MP_LT) {
264 			MP_CHECKOK(mp_sub(r, &meth->irr, r));
265 		    }
266 		}
267 #endif
268 		s_mp_clamp(r);
269 #else
270 		switch (a_used) {
271 		case 8:
272 			a7 = MP_DIGIT(a,7);
273 		case 7:
274 			a6 = MP_DIGIT(a,6);
275 		case 6:
276 			a5 = MP_DIGIT(a,5);
277 		case 5:
278 			a4 = MP_DIGIT(a,4);
279 		}
280 		a7l = a7 << 32;
281 		a7h = a7 >> 32;
282 		a6l = a6 << 32;
283 		a6h = a6 >> 32;
284 		a5l = a5 << 32;
285 		a5h = a5 >> 32;
286 		a4l = a4 << 32;
287 		a4h = a4 >> 32;
288 		r3 = MP_DIGIT(a,3);
289 		r2 = MP_DIGIT(a,2);
290 		r1 = MP_DIGIT(a,1);
291 		r0 = MP_DIGIT(a,0);
292 
293 		/* sum 1 */
294 		MP_ADD_CARRY(r1, a5h << 32, r1, 0,     carry);
295 		MP_ADD_CARRY(r2, a6,        r2, carry, carry);
296 		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
297 		r4 = carry;
298 		MP_ADD_CARRY(r1, a5h << 32, r1, 0,     carry);
299 		MP_ADD_CARRY(r2, a6,        r2, carry, carry);
300 		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
301 		r4 += carry;
302 		/* sum 2 */
303 		MP_ADD_CARRY(r1, a6l,       r1, 0,     carry);
304 		MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
305 		MP_ADD_CARRY(r3, a7h,       r3, carry, carry);
306 		r4 += carry;
307 		MP_ADD_CARRY(r1, a6l,       r1, 0,     carry);
308 		MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
309 		MP_ADD_CARRY(r3, a7h,       r3, carry, carry);
310 		r4 += carry;
311 
312 		/* sum 3 */
313 		MP_ADD_CARRY(r0, a4,        r0, 0,     carry);
314 		MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry);
315 		MP_ADD_CARRY(r2, 0,         r2, carry, carry);
316 		MP_ADD_CARRY(r3, a7,        r3, carry, carry);
317 		r4 += carry;
318 		/* sum 4 */
319 		MP_ADD_CARRY(r0, a4h | a5l,     r0, 0,     carry);
320 		MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry);
321 		MP_ADD_CARRY(r2, a7,            r2, carry, carry);
322 		MP_ADD_CARRY(r3, a6h | a4l,     r3, carry, carry);
323 		r4 += carry;
324 		/* diff 5 */
325 		MP_SUB_BORROW(r0, a5h | a6l,    r0, 0,     carry);
326 		MP_SUB_BORROW(r1, a6h,          r1, carry, carry);
327 		MP_SUB_BORROW(r2, 0,            r2, carry, carry);
328 		MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry);
329 		r4 -= carry;
330 		/* diff 6 */
331 		MP_SUB_BORROW(r0, a6,  		r0, 0,     carry);
332 		MP_SUB_BORROW(r1, a7,           r1, carry, carry);
333 		MP_SUB_BORROW(r2, 0,            r2, carry, carry);
334 		MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry);
335 		r4 -= carry;
336 		/* diff 7 */
337 		MP_SUB_BORROW(r0, a6h|a7l,	r0, 0,     carry);
338 		MP_SUB_BORROW(r1, a7h|a4l,      r1, carry, carry);
339 		MP_SUB_BORROW(r2, a4h|a5l,      r2, carry, carry);
340 		MP_SUB_BORROW(r3, a6l,          r3, carry, carry);
341 		r4 -= carry;
342 		/* diff 8 */
343 		MP_SUB_BORROW(r0, a7,	        r0, 0,     carry);
344 		MP_SUB_BORROW(r1, a4h<<32,      r1, carry, carry);
345 		MP_SUB_BORROW(r2, a5,           r2, carry, carry);
346 		MP_SUB_BORROW(r3, a6h<<32,      r3, carry, carry);
347 		r4 -= carry;
348 
349 		/* reduce the overflows */
350 		while (r4 > 0) {
351 			mp_digit r4_long = r4;
352 			mp_digit r4l = (r4_long << 32);
353 			MP_ADD_CARRY(r0, r4_long,      r0, 0,     carry);
354 			MP_ADD_CARRY(r1, -r4l,         r1, carry, carry);
355 			MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry);
356 			MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry);
357 			r4 = carry;
358 		}
359 
360 		/* reduce the underflows */
361 		while (r4 < 0) {
362 			mp_digit r4_long = -r4;
363 			mp_digit r4l = (r4_long << 32);
364 			MP_SUB_BORROW(r0, r4_long,      r0, 0,     carry);
365 			MP_SUB_BORROW(r1, -r4l,         r1, carry, carry);
366 			MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry);
367 			MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry);
368 			r4 = -carry;
369 		}
370 
371 		if (a != r) {
372 			MP_CHECKOK(s_mp_pad(r,4));
373 		}
374 		MP_SIGN(r) = MP_ZPOS;
375 		MP_USED(r) = 4;
376 
377 		MP_DIGIT(r,3) = r3;
378 		MP_DIGIT(r,2) = r2;
379 		MP_DIGIT(r,1) = r1;
380 		MP_DIGIT(r,0) = r0;
381 
382 		/* final reduction if necessary */
383 		if ((r3 > 0xFFFFFFFF00000001ULL) ||
384 			((r3 == 0xFFFFFFFF00000001ULL) &&
385 			(r2 || (r1 >> 32)||
386 			       (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
387 			/* very rare, just use mp_sub */
388 			MP_CHECKOK(mp_sub(r, &meth->irr, r));
389 		}
390 
391 		s_mp_clamp(r);
392 #endif
393 	}
394 
395   CLEANUP:
396 	return res;
397 }
398 
399 /* Compute the square of polynomial a, reduce modulo p256. Store the
400  * result in r.  r could be a.  Uses optimized modular reduction for p256.
401  */
402 mp_err
403 ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
404 {
405 	mp_err res = MP_OKAY;
406 
407 	MP_CHECKOK(mp_sqr(a, r));
408 	MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
409   CLEANUP:
410 	return res;
411 }
412 
413 /* Compute the product of two polynomials a and b, reduce modulo p256.
414  * Store the result in r.  r could be a or b; a could be b.  Uses
415  * optimized modular reduction for p256. */
416 mp_err
417 ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
418 					const GFMethod *meth)
419 {
420 	mp_err res = MP_OKAY;
421 
422 	MP_CHECKOK(mp_mul(a, b, r));
423 	MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
424   CLEANUP:
425 	return res;
426 }
427 
428 /* Wire in fast field arithmetic and precomputation of base point for
429  * named curves. */
430 mp_err
431 ec_group_set_gfp256(ECGroup *group, ECCurveName name)
432 {
433 	if (name == ECCurve_NIST_P256) {
434 		group->meth->field_mod = &ec_GFp_nistp256_mod;
435 		group->meth->field_mul = &ec_GFp_nistp256_mul;
436 		group->meth->field_sqr = &ec_GFp_nistp256_sqr;
437 	}
438 	return MP_OKAY;
439 }
440