1/*
2 * ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 *
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
14 *
15 * The Original Code is the elliptic curve math library.
16 *
17 * The Initial Developer of the Original Code is
18 * Sun Microsystems, Inc.
19 * Portions created by the Initial Developer are Copyright (C) 2003
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 *   Stephen Fung <fungstep@hotmail.com> and
24 *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
25 *
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
37 *
38 * ***** END LICENSE BLOCK ***** */
39/*
40 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
41 * Use is subject to license terms.
42 *
43 * Sun elects to use this software under the MPL license.
44 */
45
46#include "mpi.h"
47#include "mp_gf2m.h"
48#include "ecl-priv.h"
49#include "mpi-priv.h"
50#ifndef _KERNEL
51#include <stdlib.h>
52#endif
53
54/* Allocate memory for a new GFMethod object. */
55GFMethod *
56GFMethod_new(int kmflag)
57{
58	mp_err res = MP_OKAY;
59	GFMethod *meth;
60#ifdef _KERNEL
61	meth = (GFMethod *) kmem_alloc(sizeof(GFMethod), kmflag);
62#else
63	meth = (GFMethod *) malloc(sizeof(GFMethod));
64	if (meth == NULL)
65		return NULL;
66#endif
67	meth->constructed = MP_YES;
68	MP_DIGITS(&meth->irr) = 0;
69	meth->extra_free = NULL;
70	MP_CHECKOK(mp_init(&meth->irr, kmflag));
71
72  CLEANUP:
73	if (res != MP_OKAY) {
74		GFMethod_free(meth);
75		return NULL;
76	}
77	return meth;
78}
79
80/* Construct a generic GFMethod for arithmetic over prime fields with
81 * irreducible irr. */
82GFMethod *
83GFMethod_consGFp(const mp_int *irr)
84{
85	mp_err res = MP_OKAY;
86	GFMethod *meth = NULL;
87
88	meth = GFMethod_new(FLAG(irr));
89	if (meth == NULL)
90		return NULL;
91
92	MP_CHECKOK(mp_copy(irr, &meth->irr));
93	meth->irr_arr[0] = mpl_significant_bits(irr);
94	meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
95		meth->irr_arr[4] = 0;
96	switch(MP_USED(&meth->irr)) {
97	/* maybe we need 1 and 2 words here as well?*/
98	case 3:
99		meth->field_add = &ec_GFp_add_3;
100		meth->field_sub = &ec_GFp_sub_3;
101		break;
102	case 4:
103		meth->field_add = &ec_GFp_add_4;
104		meth->field_sub = &ec_GFp_sub_4;
105		break;
106	case 5:
107		meth->field_add = &ec_GFp_add_5;
108		meth->field_sub = &ec_GFp_sub_5;
109		break;
110	case 6:
111		meth->field_add = &ec_GFp_add_6;
112		meth->field_sub = &ec_GFp_sub_6;
113		break;
114	default:
115		meth->field_add = &ec_GFp_add;
116		meth->field_sub = &ec_GFp_sub;
117	}
118	meth->field_neg = &ec_GFp_neg;
119	meth->field_mod = &ec_GFp_mod;
120	meth->field_mul = &ec_GFp_mul;
121	meth->field_sqr = &ec_GFp_sqr;
122	meth->field_div = &ec_GFp_div;
123	meth->field_enc = NULL;
124	meth->field_dec = NULL;
125	meth->extra1 = NULL;
126	meth->extra2 = NULL;
127	meth->extra_free = NULL;
128
129  CLEANUP:
130	if (res != MP_OKAY) {
131		GFMethod_free(meth);
132		return NULL;
133	}
134	return meth;
135}
136
137/* Construct a generic GFMethod for arithmetic over binary polynomial
138 * fields with irreducible irr that has array representation irr_arr (see
139 * ecl-priv.h for description of the representation).  If irr_arr is NULL,
140 * then it is constructed from the bitstring representation. */
141GFMethod *
142GFMethod_consGF2m(const mp_int *irr, const unsigned int irr_arr[5])
143{
144	mp_err res = MP_OKAY;
145	int ret;
146	GFMethod *meth = NULL;
147
148	meth = GFMethod_new(FLAG(irr));
149	if (meth == NULL)
150		return NULL;
151
152	MP_CHECKOK(mp_copy(irr, &meth->irr));
153	if (irr_arr != NULL) {
154		/* Irreducible polynomials are either trinomials or pentanomials. */
155		meth->irr_arr[0] = irr_arr[0];
156		meth->irr_arr[1] = irr_arr[1];
157		meth->irr_arr[2] = irr_arr[2];
158		if (irr_arr[2] > 0) {
159			meth->irr_arr[3] = irr_arr[3];
160			meth->irr_arr[4] = irr_arr[4];
161		} else {
162			meth->irr_arr[3] = meth->irr_arr[4] = 0;
163		}
164	} else {
165		ret = mp_bpoly2arr(irr, meth->irr_arr, 5);
166		/* Irreducible polynomials are either trinomials or pentanomials. */
167		if ((ret != 5) && (ret != 3)) {
168			res = MP_UNDEF;
169			goto CLEANUP;
170		}
171	}
172	meth->field_add = &ec_GF2m_add;
173	meth->field_neg = &ec_GF2m_neg;
174	meth->field_sub = &ec_GF2m_add;
175	meth->field_mod = &ec_GF2m_mod;
176	meth->field_mul = &ec_GF2m_mul;
177	meth->field_sqr = &ec_GF2m_sqr;
178	meth->field_div = &ec_GF2m_div;
179	meth->field_enc = NULL;
180	meth->field_dec = NULL;
181	meth->extra1 = NULL;
182	meth->extra2 = NULL;
183	meth->extra_free = NULL;
184
185  CLEANUP:
186	if (res != MP_OKAY) {
187		GFMethod_free(meth);
188		return NULL;
189	}
190	return meth;
191}
192
193/* Free the memory allocated (if any) to a GFMethod object. */
194void
195GFMethod_free(GFMethod *meth)
196{
197	if (meth == NULL)
198		return;
199	if (meth->constructed == MP_NO)
200		return;
201	mp_clear(&meth->irr);
202	if (meth->extra_free != NULL)
203		meth->extra_free(meth);
204#ifdef _KERNEL
205	kmem_free(meth, sizeof(GFMethod));
206#else
207	free(meth);
208#endif
209}
210
211/* Wrapper functions for generic prime field arithmetic. */
212
213/* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
214mp_err
215ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
216		   const GFMethod *meth)
217{
218	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
219	mp_err res;
220
221	if ((res = mp_add(a, b, r)) != MP_OKAY) {
222		return res;
223	}
224	if (mp_cmp(r, &meth->irr) >= 0) {
225		return mp_sub(r, &meth->irr, r);
226	}
227	return res;
228}
229
230/* Negates a field element.  Assumes that 0 <= a < meth->irr */
231mp_err
232ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
233{
234	/* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
235
236	if (mp_cmp_z(a) == 0) {
237		mp_zero(r);
238		return MP_OKAY;
239	}
240	return mp_sub(&meth->irr, a, r);
241}
242
243/* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
244mp_err
245ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
246		   const GFMethod *meth)
247{
248	mp_err res = MP_OKAY;
249
250	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
251	res = mp_sub(a, b, r);
252	if (res == MP_RANGE) {
253		MP_CHECKOK(mp_sub(b, a, r));
254		if (mp_cmp_z(r) < 0) {
255			MP_CHECKOK(mp_add(r, &meth->irr, r));
256		}
257		MP_CHECKOK(ec_GFp_neg(r, r, meth));
258	}
259	if (mp_cmp_z(r) < 0) {
260		MP_CHECKOK(mp_add(r, &meth->irr, r));
261	}
262  CLEANUP:
263	return res;
264}
265/*
266 * Inline adds for small curve lengths.
267 */
268/* 3 words */
269mp_err
270ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
271			const GFMethod *meth)
272{
273	mp_err res = MP_OKAY;
274	mp_digit a0 = 0, a1 = 0, a2 = 0;
275	mp_digit r0 = 0, r1 = 0, r2 = 0;
276	mp_digit carry;
277
278	switch(MP_USED(a)) {
279	case 3:
280		a2 = MP_DIGIT(a,2);
281		/* FALLTHROUGH */
282	case 2:
283		a1 = MP_DIGIT(a,1);
284		/* FALLTHROUGH */
285	case 1:
286		a0 = MP_DIGIT(a,0);
287	}
288	switch(MP_USED(b)) {
289	case 3:
290		r2 = MP_DIGIT(b,2);
291		/* FALLTHROUGH */
292	case 2:
293		r1 = MP_DIGIT(b,1);
294		/* FALLTHROUGH */
295	case 1:
296		r0 = MP_DIGIT(b,0);
297	}
298
299#ifndef MPI_AMD64_ADD
300	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
301	MP_ADD_CARRY(a1, r1, r1, carry, carry);
302	MP_ADD_CARRY(a2, r2, r2, carry, carry);
303#else
304	__asm__ (
305                "xorq   %3,%3           \n\t"
306                "addq   %4,%0           \n\t"
307                "adcq   %5,%1           \n\t"
308                "adcq   %6,%2           \n\t"
309                "adcq   $0,%3           \n\t"
310                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
311                : "r" (a0), "r" (a1), "r" (a2),
312		  "0" (r0), "1" (r1), "2" (r2)
313                : "%cc" );
314#endif
315
316	MP_CHECKOK(s_mp_pad(r, 3));
317	MP_DIGIT(r, 2) = r2;
318	MP_DIGIT(r, 1) = r1;
319	MP_DIGIT(r, 0) = r0;
320	MP_SIGN(r) = MP_ZPOS;
321	MP_USED(r) = 3;
322
323	/* Do quick 'subract' if we've gone over
324	 * (add the 2's complement of the curve field) */
325	 a2 = MP_DIGIT(&meth->irr,2);
326	if (carry ||  r2 >  a2 ||
327		((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
328		a1 = MP_DIGIT(&meth->irr,1);
329		a0 = MP_DIGIT(&meth->irr,0);
330#ifndef MPI_AMD64_ADD
331		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
332		MP_SUB_BORROW(r1, a1, r1, carry, carry);
333		MP_SUB_BORROW(r2, a2, r2, carry, carry);
334#else
335		__asm__ (
336			"subq   %3,%0           \n\t"
337			"sbbq   %4,%1           \n\t"
338			"sbbq   %5,%2           \n\t"
339			: "=r"(r0), "=r"(r1), "=r"(r2)
340			: "r" (a0), "r" (a1), "r" (a2),
341			  "0" (r0), "1" (r1), "2" (r2)
342			: "%cc" );
343#endif
344		MP_DIGIT(r, 2) = r2;
345		MP_DIGIT(r, 1) = r1;
346		MP_DIGIT(r, 0) = r0;
347	}
348
349	s_mp_clamp(r);
350
351  CLEANUP:
352	return res;
353}
354
355/* 4 words */
356mp_err
357ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
358			const GFMethod *meth)
359{
360	mp_err res = MP_OKAY;
361	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
362	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
363	mp_digit carry;
364
365	switch(MP_USED(a)) {
366	case 4:
367		a3 = MP_DIGIT(a,3);
368		/* FALLTHROUGH */
369	case 3:
370		a2 = MP_DIGIT(a,2);
371		/* FALLTHROUGH */
372	case 2:
373		a1 = MP_DIGIT(a,1);
374		/* FALLTHROUGH */
375	case 1:
376		a0 = MP_DIGIT(a,0);
377	}
378	switch(MP_USED(b)) {
379	case 4:
380		r3 = MP_DIGIT(b,3);
381		/* FALLTHROUGH */
382	case 3:
383		r2 = MP_DIGIT(b,2);
384		/* FALLTHROUGH */
385	case 2:
386		r1 = MP_DIGIT(b,1);
387		/* FALLTHROUGH */
388	case 1:
389		r0 = MP_DIGIT(b,0);
390	}
391
392#ifndef MPI_AMD64_ADD
393	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
394	MP_ADD_CARRY(a1, r1, r1, carry, carry);
395	MP_ADD_CARRY(a2, r2, r2, carry, carry);
396	MP_ADD_CARRY(a3, r3, r3, carry, carry);
397#else
398	__asm__ (
399                "xorq   %4,%4           \n\t"
400                "addq   %5,%0           \n\t"
401                "adcq   %6,%1           \n\t"
402                "adcq   %7,%2           \n\t"
403                "adcq   %8,%3           \n\t"
404                "adcq   $0,%4           \n\t"
405                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
406                : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
407		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
408                : "%cc" );
409#endif
410
411	MP_CHECKOK(s_mp_pad(r, 4));
412	MP_DIGIT(r, 3) = r3;
413	MP_DIGIT(r, 2) = r2;
414	MP_DIGIT(r, 1) = r1;
415	MP_DIGIT(r, 0) = r0;
416	MP_SIGN(r) = MP_ZPOS;
417	MP_USED(r) = 4;
418
419	/* Do quick 'subract' if we've gone over
420	 * (add the 2's complement of the curve field) */
421	 a3 = MP_DIGIT(&meth->irr,3);
422	if (carry ||  r3 >  a3 ||
423		((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
424		a2 = MP_DIGIT(&meth->irr,2);
425		a1 = MP_DIGIT(&meth->irr,1);
426		a0 = MP_DIGIT(&meth->irr,0);
427#ifndef MPI_AMD64_ADD
428		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
429		MP_SUB_BORROW(r1, a1, r1, carry, carry);
430		MP_SUB_BORROW(r2, a2, r2, carry, carry);
431		MP_SUB_BORROW(r3, a3, r3, carry, carry);
432#else
433		__asm__ (
434			"subq   %4,%0           \n\t"
435			"sbbq   %5,%1           \n\t"
436			"sbbq   %6,%2           \n\t"
437			"sbbq   %7,%3           \n\t"
438			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
439			: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
440			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
441			: "%cc" );
442#endif
443		MP_DIGIT(r, 3) = r3;
444		MP_DIGIT(r, 2) = r2;
445		MP_DIGIT(r, 1) = r1;
446		MP_DIGIT(r, 0) = r0;
447	}
448
449	s_mp_clamp(r);
450
451  CLEANUP:
452	return res;
453}
454
455/* 5 words */
456mp_err
457ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
458			const GFMethod *meth)
459{
460	mp_err res = MP_OKAY;
461	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
462	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
463	mp_digit carry;
464
465	switch(MP_USED(a)) {
466	case 5:
467		a4 = MP_DIGIT(a,4);
468		/* FALLTHROUGH */
469	case 4:
470		a3 = MP_DIGIT(a,3);
471		/* FALLTHROUGH */
472	case 3:
473		a2 = MP_DIGIT(a,2);
474		/* FALLTHROUGH */
475	case 2:
476		a1 = MP_DIGIT(a,1);
477		/* FALLTHROUGH */
478	case 1:
479		a0 = MP_DIGIT(a,0);
480	}
481	switch(MP_USED(b)) {
482	case 5:
483		r4 = MP_DIGIT(b,4);
484		/* FALLTHROUGH */
485	case 4:
486		r3 = MP_DIGIT(b,3);
487		/* FALLTHROUGH */
488	case 3:
489		r2 = MP_DIGIT(b,2);
490		/* FALLTHROUGH */
491	case 2:
492		r1 = MP_DIGIT(b,1);
493		/* FALLTHROUGH */
494	case 1:
495		r0 = MP_DIGIT(b,0);
496	}
497
498	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
499	MP_ADD_CARRY(a1, r1, r1, carry, carry);
500	MP_ADD_CARRY(a2, r2, r2, carry, carry);
501	MP_ADD_CARRY(a3, r3, r3, carry, carry);
502	MP_ADD_CARRY(a4, r4, r4, carry, carry);
503
504	MP_CHECKOK(s_mp_pad(r, 5));
505	MP_DIGIT(r, 4) = r4;
506	MP_DIGIT(r, 3) = r3;
507	MP_DIGIT(r, 2) = r2;
508	MP_DIGIT(r, 1) = r1;
509	MP_DIGIT(r, 0) = r0;
510	MP_SIGN(r) = MP_ZPOS;
511	MP_USED(r) = 5;
512
513	/* Do quick 'subract' if we've gone over
514	 * (add the 2's complement of the curve field) */
515	 a4 = MP_DIGIT(&meth->irr,4);
516	if (carry ||  r4 >  a4 ||
517		((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
518		a3 = MP_DIGIT(&meth->irr,3);
519		a2 = MP_DIGIT(&meth->irr,2);
520		a1 = MP_DIGIT(&meth->irr,1);
521		a0 = MP_DIGIT(&meth->irr,0);
522		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
523		MP_SUB_BORROW(r1, a1, r1, carry, carry);
524		MP_SUB_BORROW(r2, a2, r2, carry, carry);
525		MP_SUB_BORROW(r3, a3, r3, carry, carry);
526		MP_SUB_BORROW(r4, a4, r4, carry, carry);
527		MP_DIGIT(r, 4) = r4;
528		MP_DIGIT(r, 3) = r3;
529		MP_DIGIT(r, 2) = r2;
530		MP_DIGIT(r, 1) = r1;
531		MP_DIGIT(r, 0) = r0;
532	}
533
534	s_mp_clamp(r);
535
536  CLEANUP:
537	return res;
538}
539
540/* 6 words */
541mp_err
542ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
543			const GFMethod *meth)
544{
545	mp_err res = MP_OKAY;
546	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
547	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
548	mp_digit carry;
549
550	switch(MP_USED(a)) {
551	case 6:
552		a5 = MP_DIGIT(a,5);
553		/* FALLTHROUGH */
554	case 5:
555		a4 = MP_DIGIT(a,4);
556		/* FALLTHROUGH */
557	case 4:
558		a3 = MP_DIGIT(a,3);
559		/* FALLTHROUGH */
560	case 3:
561		a2 = MP_DIGIT(a,2);
562		/* FALLTHROUGH */
563	case 2:
564		a1 = MP_DIGIT(a,1);
565		/* FALLTHROUGH */
566	case 1:
567		a0 = MP_DIGIT(a,0);
568	}
569	switch(MP_USED(b)) {
570	case 6:
571		r5 = MP_DIGIT(b,5);
572		/* FALLTHROUGH */
573	case 5:
574		r4 = MP_DIGIT(b,4);
575		/* FALLTHROUGH */
576	case 4:
577		r3 = MP_DIGIT(b,3);
578		/* FALLTHROUGH */
579	case 3:
580		r2 = MP_DIGIT(b,2);
581		/* FALLTHROUGH */
582	case 2:
583		r1 = MP_DIGIT(b,1);
584		/* FALLTHROUGH */
585	case 1:
586		r0 = MP_DIGIT(b,0);
587	}
588
589	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
590	MP_ADD_CARRY(a1, r1, r1, carry, carry);
591	MP_ADD_CARRY(a2, r2, r2, carry, carry);
592	MP_ADD_CARRY(a3, r3, r3, carry, carry);
593	MP_ADD_CARRY(a4, r4, r4, carry, carry);
594	MP_ADD_CARRY(a5, r5, r5, carry, carry);
595
596	MP_CHECKOK(s_mp_pad(r, 6));
597	MP_DIGIT(r, 5) = r5;
598	MP_DIGIT(r, 4) = r4;
599	MP_DIGIT(r, 3) = r3;
600	MP_DIGIT(r, 2) = r2;
601	MP_DIGIT(r, 1) = r1;
602	MP_DIGIT(r, 0) = r0;
603	MP_SIGN(r) = MP_ZPOS;
604	MP_USED(r) = 6;
605
606	/* Do quick 'subract' if we've gone over
607	 * (add the 2's complement of the curve field) */
608	a5 = MP_DIGIT(&meth->irr,5);
609	if (carry ||  r5 >  a5 ||
610		((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
611		a4 = MP_DIGIT(&meth->irr,4);
612		a3 = MP_DIGIT(&meth->irr,3);
613		a2 = MP_DIGIT(&meth->irr,2);
614		a1 = MP_DIGIT(&meth->irr,1);
615		a0 = MP_DIGIT(&meth->irr,0);
616		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
617		MP_SUB_BORROW(r1, a1, r1, carry, carry);
618		MP_SUB_BORROW(r2, a2, r2, carry, carry);
619		MP_SUB_BORROW(r3, a3, r3, carry, carry);
620		MP_SUB_BORROW(r4, a4, r4, carry, carry);
621		MP_SUB_BORROW(r5, a5, r5, carry, carry);
622		MP_DIGIT(r, 5) = r5;
623		MP_DIGIT(r, 4) = r4;
624		MP_DIGIT(r, 3) = r3;
625		MP_DIGIT(r, 2) = r2;
626		MP_DIGIT(r, 1) = r1;
627		MP_DIGIT(r, 0) = r0;
628	}
629
630	s_mp_clamp(r);
631
632  CLEANUP:
633	return res;
634}
635
636/*
637 * The following subraction functions do in-line subractions based
638 * on our curve size.
639 *
640 * ... 3 words
641 */
642mp_err
643ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
644			const GFMethod *meth)
645{
646	mp_err res = MP_OKAY;
647	mp_digit b0 = 0, b1 = 0, b2 = 0;
648	mp_digit r0 = 0, r1 = 0, r2 = 0;
649	mp_digit borrow;
650
651	switch(MP_USED(a)) {
652	case 3:
653		r2 = MP_DIGIT(a,2);
654		/* FALLTHROUGH */
655	case 2:
656		r1 = MP_DIGIT(a,1);
657		/* FALLTHROUGH */
658	case 1:
659		r0 = MP_DIGIT(a,0);
660	}
661	switch(MP_USED(b)) {
662	case 3:
663		b2 = MP_DIGIT(b,2);
664		/* FALLTHROUGH */
665	case 2:
666		b1 = MP_DIGIT(b,1);
667		/* FALLTHROUGH */
668	case 1:
669		b0 = MP_DIGIT(b,0);
670	}
671
672#ifndef MPI_AMD64_ADD
673	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
674	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
675	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
676#else
677	__asm__ (
678                "xorq   %3,%3           \n\t"
679                "subq   %4,%0           \n\t"
680                "sbbq   %5,%1           \n\t"
681                "sbbq   %6,%2           \n\t"
682                "adcq   $0,%3           \n\t"
683                : "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
684                : "r" (b0), "r" (b1), "r" (b2),
685		  "0" (r0), "1" (r1), "2" (r2)
686                : "%cc" );
687#endif
688
689	/* Do quick 'add' if we've gone under 0
690	 * (subtract the 2's complement of the curve field) */
691	if (borrow) {
692	 	b2 = MP_DIGIT(&meth->irr,2);
693		b1 = MP_DIGIT(&meth->irr,1);
694		b0 = MP_DIGIT(&meth->irr,0);
695#ifndef MPI_AMD64_ADD
696		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
697		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
698		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
699#else
700		__asm__ (
701			"addq   %3,%0           \n\t"
702			"adcq   %4,%1           \n\t"
703			"adcq   %5,%2           \n\t"
704			: "=r"(r0), "=r"(r1), "=r"(r2)
705			: "r" (b0), "r" (b1), "r" (b2),
706  			  "0" (r0), "1" (r1), "2" (r2)
707			: "%cc" );
708#endif
709	}
710
711#ifdef MPI_AMD64_ADD
712	/* compiler fakeout? */
713	if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
714		MP_CHECKOK(s_mp_pad(r, 4));
715	}
716#endif
717	MP_CHECKOK(s_mp_pad(r, 3));
718	MP_DIGIT(r, 2) = r2;
719	MP_DIGIT(r, 1) = r1;
720	MP_DIGIT(r, 0) = r0;
721	MP_SIGN(r) = MP_ZPOS;
722	MP_USED(r) = 3;
723	s_mp_clamp(r);
724
725  CLEANUP:
726	return res;
727}
728
729/* 4 words */
730mp_err
731ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
732			const GFMethod *meth)
733{
734	mp_err res = MP_OKAY;
735	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
736	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
737	mp_digit borrow;
738
739	switch(MP_USED(a)) {
740	case 4:
741		r3 = MP_DIGIT(a,3);
742		/* FALLTHROUGH */
743	case 3:
744		r2 = MP_DIGIT(a,2);
745		/* FALLTHROUGH */
746	case 2:
747		r1 = MP_DIGIT(a,1);
748		/* FALLTHROUGH */
749	case 1:
750		r0 = MP_DIGIT(a,0);
751	}
752	switch(MP_USED(b)) {
753	case 4:
754		b3 = MP_DIGIT(b,3);
755		/* FALLTHROUGH */
756	case 3:
757		b2 = MP_DIGIT(b,2);
758		/* FALLTHROUGH */
759	case 2:
760		b1 = MP_DIGIT(b,1);
761		/* FALLTHROUGH */
762	case 1:
763		b0 = MP_DIGIT(b,0);
764	}
765
766#ifndef MPI_AMD64_ADD
767	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
768	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
769	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
770	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
771#else
772	__asm__ (
773                "xorq   %4,%4           \n\t"
774                "subq   %5,%0           \n\t"
775                "sbbq   %6,%1           \n\t"
776                "sbbq   %7,%2           \n\t"
777                "sbbq   %8,%3           \n\t"
778                "adcq   $0,%4           \n\t"
779                : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
780                : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
781		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
782                : "%cc" );
783#endif
784
785	/* Do quick 'add' if we've gone under 0
786	 * (subtract the 2's complement of the curve field) */
787	if (borrow) {
788	 	b3 = MP_DIGIT(&meth->irr,3);
789	 	b2 = MP_DIGIT(&meth->irr,2);
790		b1 = MP_DIGIT(&meth->irr,1);
791		b0 = MP_DIGIT(&meth->irr,0);
792#ifndef MPI_AMD64_ADD
793		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
794		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
795		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
796		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
797#else
798		__asm__ (
799			"addq   %4,%0           \n\t"
800			"adcq   %5,%1           \n\t"
801			"adcq   %6,%2           \n\t"
802			"adcq   %7,%3           \n\t"
803			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
804			: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
805  			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
806			: "%cc" );
807#endif
808	}
809#ifdef MPI_AMD64_ADD
810	/* compiler fakeout? */
811	if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
812		MP_CHECKOK(s_mp_pad(r, 4));
813	}
814#endif
815	MP_CHECKOK(s_mp_pad(r, 4));
816	MP_DIGIT(r, 3) = r3;
817	MP_DIGIT(r, 2) = r2;
818	MP_DIGIT(r, 1) = r1;
819	MP_DIGIT(r, 0) = r0;
820	MP_SIGN(r) = MP_ZPOS;
821	MP_USED(r) = 4;
822	s_mp_clamp(r);
823
824  CLEANUP:
825	return res;
826}
827
828/* 5 words */
829mp_err
830ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
831			const GFMethod *meth)
832{
833	mp_err res = MP_OKAY;
834	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
835	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
836	mp_digit borrow;
837
838	switch(MP_USED(a)) {
839	case 5:
840		r4 = MP_DIGIT(a,4);
841		/* FALLTHROUGH */
842	case 4:
843		r3 = MP_DIGIT(a,3);
844		/* FALLTHROUGH */
845	case 3:
846		r2 = MP_DIGIT(a,2);
847		/* FALLTHROUGH */
848	case 2:
849		r1 = MP_DIGIT(a,1);
850		/* FALLTHROUGH */
851	case 1:
852		r0 = MP_DIGIT(a,0);
853	}
854	switch(MP_USED(b)) {
855	case 5:
856		b4 = MP_DIGIT(b,4);
857		/* FALLTHROUGH */
858	case 4:
859		b3 = MP_DIGIT(b,3);
860		/* FALLTHROUGH */
861	case 3:
862		b2 = MP_DIGIT(b,2);
863		/* FALLTHROUGH */
864	case 2:
865		b1 = MP_DIGIT(b,1);
866		/* FALLTHROUGH */
867	case 1:
868		b0 = MP_DIGIT(b,0);
869	}
870
871	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
872	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
873	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
874	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
875	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
876
877	/* Do quick 'add' if we've gone under 0
878	 * (subtract the 2's complement of the curve field) */
879	if (borrow) {
880	 	b4 = MP_DIGIT(&meth->irr,4);
881	 	b3 = MP_DIGIT(&meth->irr,3);
882	 	b2 = MP_DIGIT(&meth->irr,2);
883		b1 = MP_DIGIT(&meth->irr,1);
884		b0 = MP_DIGIT(&meth->irr,0);
885		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
886		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
887		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
888		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
889	}
890	MP_CHECKOK(s_mp_pad(r, 5));
891	MP_DIGIT(r, 4) = r4;
892	MP_DIGIT(r, 3) = r3;
893	MP_DIGIT(r, 2) = r2;
894	MP_DIGIT(r, 1) = r1;
895	MP_DIGIT(r, 0) = r0;
896	MP_SIGN(r) = MP_ZPOS;
897	MP_USED(r) = 5;
898	s_mp_clamp(r);
899
900  CLEANUP:
901	return res;
902}
903
904/* 6 words */
905mp_err
906ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
907			const GFMethod *meth)
908{
909	mp_err res = MP_OKAY;
910	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
911	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
912	mp_digit borrow;
913
914	switch(MP_USED(a)) {
915	case 6:
916		r5 = MP_DIGIT(a,5);
917		/* FALLTHROUGH */
918	case 5:
919		r4 = MP_DIGIT(a,4);
920		/* FALLTHROUGH */
921	case 4:
922		r3 = MP_DIGIT(a,3);
923		/* FALLTHROUGH */
924	case 3:
925		r2 = MP_DIGIT(a,2);
926		/* FALLTHROUGH */
927	case 2:
928		r1 = MP_DIGIT(a,1);
929		/* FALLTHROUGH */
930	case 1:
931		r0 = MP_DIGIT(a,0);
932	}
933	switch(MP_USED(b)) {
934	case 6:
935		b5 = MP_DIGIT(b,5);
936		/* FALLTHROUGH */
937	case 5:
938		b4 = MP_DIGIT(b,4);
939		/* FALLTHROUGH */
940	case 4:
941		b3 = MP_DIGIT(b,3);
942		/* FALLTHROUGH */
943	case 3:
944		b2 = MP_DIGIT(b,2);
945		/* FALLTHROUGH */
946	case 2:
947		b1 = MP_DIGIT(b,1);
948		/* FALLTHROUGH */
949	case 1:
950		b0 = MP_DIGIT(b,0);
951	}
952
953	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
954	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
955	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
956	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
957	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
958	MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
959
960	/* Do quick 'add' if we've gone under 0
961	 * (subtract the 2's complement of the curve field) */
962	if (borrow) {
963	 	b5 = MP_DIGIT(&meth->irr,5);
964	 	b4 = MP_DIGIT(&meth->irr,4);
965	 	b3 = MP_DIGIT(&meth->irr,3);
966	 	b2 = MP_DIGIT(&meth->irr,2);
967		b1 = MP_DIGIT(&meth->irr,1);
968		b0 = MP_DIGIT(&meth->irr,0);
969		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
970		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
971		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
972		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
973		MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
974	}
975
976	MP_CHECKOK(s_mp_pad(r, 6));
977	MP_DIGIT(r, 5) = r5;
978	MP_DIGIT(r, 4) = r4;
979	MP_DIGIT(r, 3) = r3;
980	MP_DIGIT(r, 2) = r2;
981	MP_DIGIT(r, 1) = r1;
982	MP_DIGIT(r, 0) = r0;
983	MP_SIGN(r) = MP_ZPOS;
984	MP_USED(r) = 6;
985	s_mp_clamp(r);
986
987  CLEANUP:
988	return res;
989}
990
991
992/* Reduces an integer to a field element. */
993mp_err
994ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
995{
996	return mp_mod(a, &meth->irr, r);
997}
998
999/* Multiplies two field elements. */
1000mp_err
1001ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
1002		   const GFMethod *meth)
1003{
1004	return mp_mulmod(a, b, &meth->irr, r);
1005}
1006
1007/* Squares a field element. */
1008mp_err
1009ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1010{
1011	return mp_sqrmod(a, &meth->irr, r);
1012}
1013
1014/* Divides two field elements. If a is NULL, then returns the inverse of
1015 * b. */
1016mp_err
1017ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
1018		   const GFMethod *meth)
1019{
1020	mp_err res = MP_OKAY;
1021	mp_int t;
1022
1023	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1024	if (a == NULL) {
1025		return mp_invmod(b, &meth->irr, r);
1026	} else {
1027		/* MPI doesn't support divmod, so we implement it using invmod and
1028		 * mulmod. */
1029		MP_CHECKOK(mp_init(&t, FLAG(b)));
1030		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
1031		MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
1032	  CLEANUP:
1033		mp_clear(&t);
1034		return res;
1035	}
1036}
1037
1038/* Wrapper functions for generic binary polynomial field arithmetic. */
1039
1040/* Adds two field elements. */
1041mp_err
1042ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
1043			const GFMethod *meth)
1044{
1045	return mp_badd(a, b, r);
1046}
1047
1048/* Negates a field element. Note that for binary polynomial fields, the
1049 * negation of a field element is the field element itself. */
1050mp_err
1051ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
1052{
1053	if (a == r) {
1054		return MP_OKAY;
1055	} else {
1056		return mp_copy(a, r);
1057	}
1058}
1059
1060/* Reduces a binary polynomial to a field element. */
1061mp_err
1062ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
1063{
1064	return mp_bmod(a, meth->irr_arr, r);
1065}
1066
1067/* Multiplies two field elements. */
1068mp_err
1069ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
1070			const GFMethod *meth)
1071{
1072	return mp_bmulmod(a, b, meth->irr_arr, r);
1073}
1074
1075/* Squares a field element. */
1076mp_err
1077ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1078{
1079	return mp_bsqrmod(a, meth->irr_arr, r);
1080}
1081
1082/* Divides two field elements. If a is NULL, then returns the inverse of
1083 * b. */
1084mp_err
1085ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
1086			const GFMethod *meth)
1087{
1088	mp_err res = MP_OKAY;
1089	mp_int t;
1090
1091	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1092	if (a == NULL) {
1093		/* The GF(2^m) portion of MPI doesn't support invmod, so we
1094		 * compute 1/b. */
1095		MP_CHECKOK(mp_init(&t, FLAG(b)));
1096		MP_CHECKOK(mp_set_int(&t, 1));
1097		MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
1098	  CLEANUP:
1099		mp_clear(&t);
1100		return res;
1101	} else {
1102		return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
1103	}
1104}
1105