1/*
2chacha-merged.c version 20080118
3D. J. Bernstein
4Public domain.
5*/
6
7/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */
8
9#include <chacha.h>
10#include <stddef.h>
11
12typedef unsigned char u8;
13typedef unsigned int u32;
14typedef unsigned int u_int;
15
16#define U8C(v) (v##U)
17#define U32C(v) (v##U)
18
19#define U8V(v) ((u8)(v) & U8C(0xFF))
20#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
21
22#define ROTL32(v, n) \
23  (U32V((v) << (n)) | ((v) >> (32 - (n))))
24
25#define U8TO32_LITTLE(p) \
26  (((u32)((p)[0])      ) | \
27   ((u32)((p)[1]) <<  8) | \
28   ((u32)((p)[2]) << 16) | \
29   ((u32)((p)[3]) << 24))
30
31#define U32TO8_LITTLE(p, v) \
32  do { \
33    (p)[0] = U8V((v)      ); \
34    (p)[1] = U8V((v) >>  8); \
35    (p)[2] = U8V((v) >> 16); \
36    (p)[3] = U8V((v) >> 24); \
37  } while (0)
38
39#define ROTATE(v,c) (ROTL32(v,c))
40#define XOR(v,w) ((v) ^ (w))
41#define PLUS(v,w) (U32V((v) + (w)))
42#define PLUSONE(v) (PLUS((v),1))
43
44#define QUARTERROUND(a,b,c,d) \
45  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
46  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
47  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
48  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
49
50static const char sigma[16] = "expand 32-byte k";
51static const char tau[16] = "expand 16-byte k";
52
53void
54chacha_keysetup(chacha_ctx_t *x,const u8 *k,u32 kbits,u32 ivbits)
55{
56  const char *constants;
57
58  x->chacha_input[4] = U8TO32_LITTLE(k + 0);
59  x->chacha_input[5] = U8TO32_LITTLE(k + 4);
60  x->chacha_input[6] = U8TO32_LITTLE(k + 8);
61  x->chacha_input[7] = U8TO32_LITTLE(k + 12);
62  if (kbits == 256) { /* recommended */
63    k += 16;
64    constants = sigma;
65  } else { /* kbits == 128 */
66    constants = tau;
67  }
68  x->chacha_input[8] = U8TO32_LITTLE(k + 0);
69  x->chacha_input[9] = U8TO32_LITTLE(k + 4);
70  x->chacha_input[10] = U8TO32_LITTLE(k + 8);
71  x->chacha_input[11] = U8TO32_LITTLE(k + 12);
72  x->chacha_input[0] = U8TO32_LITTLE(constants + 0);
73  x->chacha_input[1] = U8TO32_LITTLE(constants + 4);
74  x->chacha_input[2] = U8TO32_LITTLE(constants + 8);
75  x->chacha_input[3] = U8TO32_LITTLE(constants + 12);
76}
77
78void
79chacha_ivsetup(chacha_ctx_t *x,const u8 *iv)
80{
81  x->chacha_input[12] = 0;
82  x->chacha_input[13] = 0;
83  x->chacha_input[14] = U8TO32_LITTLE(iv + 0);
84  x->chacha_input[15] = U8TO32_LITTLE(iv + 4);
85}
86
87void
88chacha_encrypt_bytes(chacha_ctx_t *x,const u8 *m,u8 *c,u32 bytes)
89{
90  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
91  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
92  u8 *ctarget = NULL;
93  u8 tmp[64];
94  u_int i;
95
96  if (!bytes) return;
97
98  j0 = x->chacha_input[0];
99  j1 = x->chacha_input[1];
100  j2 = x->chacha_input[2];
101  j3 = x->chacha_input[3];
102  j4 = x->chacha_input[4];
103  j5 = x->chacha_input[5];
104  j6 = x->chacha_input[6];
105  j7 = x->chacha_input[7];
106  j8 = x->chacha_input[8];
107  j9 = x->chacha_input[9];
108  j10 = x->chacha_input[10];
109  j11 = x->chacha_input[11];
110  j12 = x->chacha_input[12];
111  j13 = x->chacha_input[13];
112  j14 = x->chacha_input[14];
113  j15 = x->chacha_input[15];
114
115  for (;;) {
116    if (bytes < 64) {
117      for (i = 0;i < bytes;++i) tmp[i] = m[i];
118      m = tmp;
119      ctarget = c;
120      c = tmp;
121    }
122    x0 = j0;
123    x1 = j1;
124    x2 = j2;
125    x3 = j3;
126    x4 = j4;
127    x5 = j5;
128    x6 = j6;
129    x7 = j7;
130    x8 = j8;
131    x9 = j9;
132    x10 = j10;
133    x11 = j11;
134    x12 = j12;
135    x13 = j13;
136    x14 = j14;
137    x15 = j15;
138    for (i = 20;i > 0;i -= 2) {
139      QUARTERROUND( x0, x4, x8,x12)
140      QUARTERROUND( x1, x5, x9,x13)
141      QUARTERROUND( x2, x6,x10,x14)
142      QUARTERROUND( x3, x7,x11,x15)
143      QUARTERROUND( x0, x5,x10,x15)
144      QUARTERROUND( x1, x6,x11,x12)
145      QUARTERROUND( x2, x7, x8,x13)
146      QUARTERROUND( x3, x4, x9,x14)
147    }
148    x0 = PLUS(x0,j0);
149    x1 = PLUS(x1,j1);
150    x2 = PLUS(x2,j2);
151    x3 = PLUS(x3,j3);
152    x4 = PLUS(x4,j4);
153    x5 = PLUS(x5,j5);
154    x6 = PLUS(x6,j6);
155    x7 = PLUS(x7,j7);
156    x8 = PLUS(x8,j8);
157    x9 = PLUS(x9,j9);
158    x10 = PLUS(x10,j10);
159    x11 = PLUS(x11,j11);
160    x12 = PLUS(x12,j12);
161    x13 = PLUS(x13,j13);
162    x14 = PLUS(x14,j14);
163    x15 = PLUS(x15,j15);
164
165#ifndef KEYSTREAM_ONLY
166    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
167    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
168    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
169    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
170    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
171    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
172    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
173    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
174    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
175    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
176    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
177    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
178    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
179    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
180    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
181    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
182#endif
183
184    j12 = PLUSONE(j12);
185    if (!j12) {
186      j13 = PLUSONE(j13);
187      /* stopping at 2^70 bytes per nonce is user's responsibility */
188    }
189
190    U32TO8_LITTLE(c + 0,x0);
191    U32TO8_LITTLE(c + 4,x1);
192    U32TO8_LITTLE(c + 8,x2);
193    U32TO8_LITTLE(c + 12,x3);
194    U32TO8_LITTLE(c + 16,x4);
195    U32TO8_LITTLE(c + 20,x5);
196    U32TO8_LITTLE(c + 24,x6);
197    U32TO8_LITTLE(c + 28,x7);
198    U32TO8_LITTLE(c + 32,x8);
199    U32TO8_LITTLE(c + 36,x9);
200    U32TO8_LITTLE(c + 40,x10);
201    U32TO8_LITTLE(c + 44,x11);
202    U32TO8_LITTLE(c + 48,x12);
203    U32TO8_LITTLE(c + 52,x13);
204    U32TO8_LITTLE(c + 56,x14);
205    U32TO8_LITTLE(c + 60,x15);
206
207    if (bytes <= 64) {
208      if (bytes < 64) {
209        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
210      }
211      x->chacha_input[12] = j12;
212      x->chacha_input[13] = j13;
213      return;
214    }
215    bytes -= 64;
216    c += 64;
217#ifndef KEYSTREAM_ONLY
218    m += 64;
219#endif
220  }
221}
222