1 | /* |
2 | Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, |
3 | Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby |
4 | denoted as "the implementer". |
5 | |
6 | For more information, feedback or questions, please refer to our websites: |
7 | http://keccak.noekeon.org/ |
8 | http://keyak.noekeon.org/ |
9 | http://ketje.noekeon.org/ |
10 | |
11 | To the extent possible under law, the implementer has waived all copyright |
12 | and related or neighboring rights to the source code in this file. |
13 | http://creativecommons.org/publicdomain/zero/1.0/ |
14 | */ |
15 | |
16 | #include <string.h> |
17 | #include <stdlib.h> |
18 | /* #include "brg_endian.h" */ |
19 | #include "KeccakP-1600-opt64-config.h" |
20 | |
21 | #if NOT_PYTHON |
22 | typedef unsigned char UINT8; |
23 | /* typedef unsigned long long int UINT64; */ |
24 | #endif |
25 | |
26 | #if defined(KeccakP1600_useLaneComplementing) |
27 | #define UseBebigokimisa |
28 | #endif |
29 | |
30 | #if defined(_MSC_VER) |
31 | #define ROL64(a, offset) _rotl64(a, offset) |
32 | #elif defined(KeccakP1600_useSHLD) |
33 | #define ROL64(x,N) ({ \ |
34 | register UINT64 __out; \ |
35 | register UINT64 __in = x; \ |
36 | __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ |
37 | __out; \ |
38 | }) |
39 | #else |
40 | #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) |
41 | #endif |
42 | |
43 | #include "KeccakP-1600-64.macros" |
44 | #ifdef KeccakP1600_fullUnrolling |
45 | #define FullUnrolling |
46 | #else |
47 | #define Unrolling KeccakP1600_unrolling |
48 | #endif |
49 | #include "KeccakP-1600-unrolling.macros" |
50 | #include "SnP-Relaned.h" |
51 | |
52 | static const UINT64 KeccakF1600RoundConstants[24] = { |
53 | 0x0000000000000001ULL, |
54 | 0x0000000000008082ULL, |
55 | 0x800000000000808aULL, |
56 | 0x8000000080008000ULL, |
57 | 0x000000000000808bULL, |
58 | 0x0000000080000001ULL, |
59 | 0x8000000080008081ULL, |
60 | 0x8000000000008009ULL, |
61 | 0x000000000000008aULL, |
62 | 0x0000000000000088ULL, |
63 | 0x0000000080008009ULL, |
64 | 0x000000008000000aULL, |
65 | 0x000000008000808bULL, |
66 | 0x800000000000008bULL, |
67 | 0x8000000000008089ULL, |
68 | 0x8000000000008003ULL, |
69 | 0x8000000000008002ULL, |
70 | 0x8000000000000080ULL, |
71 | 0x000000000000800aULL, |
72 | 0x800000008000000aULL, |
73 | 0x8000000080008081ULL, |
74 | 0x8000000000008080ULL, |
75 | 0x0000000080000001ULL, |
76 | 0x8000000080008008ULL }; |
77 | |
78 | /* ---------------------------------------------------------------- */ |
79 | |
80 | void KeccakP1600_Initialize(void *state) |
81 | { |
82 | memset(state, 0, 200); |
83 | #ifdef KeccakP1600_useLaneComplementing |
84 | ((UINT64*)state)[ 1] = ~(UINT64)0; |
85 | ((UINT64*)state)[ 2] = ~(UINT64)0; |
86 | ((UINT64*)state)[ 8] = ~(UINT64)0; |
87 | ((UINT64*)state)[12] = ~(UINT64)0; |
88 | ((UINT64*)state)[17] = ~(UINT64)0; |
89 | ((UINT64*)state)[20] = ~(UINT64)0; |
90 | #endif |
91 | } |
92 | |
93 | /* ---------------------------------------------------------------- */ |
94 | |
95 | void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) |
96 | { |
97 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
98 | UINT64 lane; |
99 | if (length == 0) |
100 | return; |
101 | if (length == 1) |
102 | lane = data[0]; |
103 | else { |
104 | lane = 0; |
105 | memcpy(&lane, data, length); |
106 | } |
107 | lane <<= offset*8; |
108 | #else |
109 | UINT64 lane = 0; |
110 | unsigned int i; |
111 | for(i=0; i<length; i++) |
112 | lane |= ((UINT64)data[i]) << ((i+offset)*8); |
113 | #endif |
114 | ((UINT64*)state)[lanePosition] ^= lane; |
115 | } |
116 | |
117 | /* ---------------------------------------------------------------- */ |
118 | |
119 | void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) |
120 | { |
121 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
122 | unsigned int i = 0; |
123 | #ifdef NO_MISALIGNED_ACCESSES |
124 | /* If either pointer is misaligned, fall back to byte-wise xor. */ |
125 | |
126 | if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) { |
127 | for (i = 0; i < laneCount * 8; i++) { |
128 | ((unsigned char*)state)[i] ^= data[i]; |
129 | } |
130 | } |
131 | else |
132 | #endif |
133 | { |
134 | /* Otherwise... */ |
135 | |
136 | for( ; (i+8)<=laneCount; i+=8) { |
137 | ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; |
138 | ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; |
139 | ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; |
140 | ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; |
141 | ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4]; |
142 | ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5]; |
143 | ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6]; |
144 | ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7]; |
145 | } |
146 | for( ; (i+4)<=laneCount; i+=4) { |
147 | ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; |
148 | ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; |
149 | ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; |
150 | ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; |
151 | } |
152 | for( ; (i+2)<=laneCount; i+=2) { |
153 | ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; |
154 | ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; |
155 | } |
156 | if (i<laneCount) { |
157 | ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; |
158 | } |
159 | } |
160 | #else |
161 | unsigned int i; |
162 | UINT8 *curData = data; |
163 | for(i=0; i<laneCount; i++, curData+=8) { |
164 | UINT64 lane = (UINT64)curData[0] |
165 | | ((UINT64)curData[1] << 8) |
166 | | ((UINT64)curData[2] << 16) |
167 | | ((UINT64)curData[3] << 24) |
168 | | ((UINT64)curData[4] <<32) |
169 | | ((UINT64)curData[5] << 40) |
170 | | ((UINT64)curData[6] << 48) |
171 | | ((UINT64)curData[7] << 56); |
172 | ((UINT64*)state)[i] ^= lane; |
173 | } |
174 | #endif |
175 | } |
176 | |
177 | /* ---------------------------------------------------------------- */ |
178 | |
179 | #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) |
180 | void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) |
181 | { |
182 | UINT64 lane = byte; |
183 | lane <<= (offset%8)*8; |
184 | ((UINT64*)state)[offset/8] ^= lane; |
185 | } |
186 | #endif |
187 | |
188 | /* ---------------------------------------------------------------- */ |
189 | |
190 | void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) |
191 | { |
192 | SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); |
193 | } |
194 | |
195 | /* ---------------------------------------------------------------- */ |
196 | |
197 | void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) |
198 | { |
199 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
200 | #ifdef KeccakP1600_useLaneComplementing |
201 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { |
202 | unsigned int i; |
203 | for(i=0; i<length; i++) |
204 | ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i]; |
205 | } |
206 | else |
207 | #endif |
208 | { |
209 | memcpy((unsigned char*)state+lanePosition*8+offset, data, length); |
210 | } |
211 | #else |
212 | #error "Not yet implemented" |
213 | #endif |
214 | } |
215 | |
216 | /* ---------------------------------------------------------------- */ |
217 | |
218 | void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) |
219 | { |
220 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
221 | #ifdef KeccakP1600_useLaneComplementing |
222 | unsigned int lanePosition; |
223 | |
224 | for(lanePosition=0; lanePosition<laneCount; lanePosition++) |
225 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) |
226 | ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition]; |
227 | else |
228 | ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition]; |
229 | #else |
230 | memcpy(state, data, laneCount*8); |
231 | #endif |
232 | #else |
233 | #error "Not yet implemented" |
234 | #endif |
235 | } |
236 | |
237 | /* ---------------------------------------------------------------- */ |
238 | |
239 | void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) |
240 | { |
241 | SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); |
242 | } |
243 | |
244 | /* ---------------------------------------------------------------- */ |
245 | |
246 | void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) |
247 | { |
248 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
249 | #ifdef KeccakP1600_useLaneComplementing |
250 | unsigned int lanePosition; |
251 | |
252 | for(lanePosition=0; lanePosition<byteCount/8; lanePosition++) |
253 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) |
254 | ((UINT64*)state)[lanePosition] = ~0; |
255 | else |
256 | ((UINT64*)state)[lanePosition] = 0; |
257 | if (byteCount%8 != 0) { |
258 | lanePosition = byteCount/8; |
259 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) |
260 | memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8); |
261 | else |
262 | memset((unsigned char*)state+lanePosition*8, 0, byteCount%8); |
263 | } |
264 | #else |
265 | memset(state, 0, byteCount); |
266 | #endif |
267 | #else |
268 | #error "Not yet implemented" |
269 | #endif |
270 | } |
271 | |
272 | /* ---------------------------------------------------------------- */ |
273 | |
274 | void KeccakP1600_Permute_24rounds(void *state) |
275 | { |
276 | declareABCDE |
277 | #ifndef KeccakP1600_fullUnrolling |
278 | unsigned int i; |
279 | #endif |
280 | UINT64 *stateAsLanes = (UINT64*)state; |
281 | |
282 | copyFromState(A, stateAsLanes) |
283 | rounds24 |
284 | copyToState(stateAsLanes, A) |
285 | } |
286 | |
287 | /* ---------------------------------------------------------------- */ |
288 | |
289 | void KeccakP1600_Permute_12rounds(void *state) |
290 | { |
291 | declareABCDE |
292 | #ifndef KeccakP1600_fullUnrolling |
293 | unsigned int i; |
294 | #endif |
295 | UINT64 *stateAsLanes = (UINT64*)state; |
296 | |
297 | copyFromState(A, stateAsLanes) |
298 | rounds12 |
299 | copyToState(stateAsLanes, A) |
300 | } |
301 | |
302 | /* ---------------------------------------------------------------- */ |
303 | |
304 | void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) |
305 | { |
306 | UINT64 lane = ((UINT64*)state)[lanePosition]; |
307 | #ifdef KeccakP1600_useLaneComplementing |
308 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) |
309 | lane = ~lane; |
310 | #endif |
311 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
312 | { |
313 | UINT64 lane1[1]; |
314 | lane1[0] = lane; |
315 | memcpy(data, (UINT8*)lane1+offset, length); |
316 | } |
317 | #else |
318 | unsigned int i; |
319 | lane >>= offset*8; |
320 | for(i=0; i<length; i++) { |
321 | data[i] = lane & 0xFF; |
322 | lane >>= 8; |
323 | } |
324 | #endif |
325 | } |
326 | |
327 | /* ---------------------------------------------------------------- */ |
328 | |
329 | #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) |
330 | void fromWordToBytes(UINT8 *bytes, const UINT64 word) |
331 | { |
332 | unsigned int i; |
333 | |
334 | for(i=0; i<(64/8); i++) |
335 | bytes[i] = (word >> (8*i)) & 0xFF; |
336 | } |
337 | #endif |
338 | |
339 | void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) |
340 | { |
341 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
342 | memcpy(data, state, laneCount*8); |
343 | #else |
344 | unsigned int i; |
345 | |
346 | for(i=0; i<laneCount; i++) |
347 | fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); |
348 | #endif |
349 | #ifdef KeccakP1600_useLaneComplementing |
350 | if (laneCount > 1) { |
351 | ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; |
352 | if (laneCount > 2) { |
353 | ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; |
354 | if (laneCount > 8) { |
355 | ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; |
356 | if (laneCount > 12) { |
357 | ((UINT64*)data)[12] = ~((UINT64*)data)[12]; |
358 | if (laneCount > 17) { |
359 | ((UINT64*)data)[17] = ~((UINT64*)data)[17]; |
360 | if (laneCount > 20) { |
361 | ((UINT64*)data)[20] = ~((UINT64*)data)[20]; |
362 | } |
363 | } |
364 | } |
365 | } |
366 | } |
367 | } |
368 | #endif |
369 | } |
370 | |
371 | /* ---------------------------------------------------------------- */ |
372 | |
373 | void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) |
374 | { |
375 | SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); |
376 | } |
377 | |
378 | /* ---------------------------------------------------------------- */ |
379 | |
380 | void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) |
381 | { |
382 | UINT64 lane = ((UINT64*)state)[lanePosition]; |
383 | #ifdef KeccakP1600_useLaneComplementing |
384 | if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) |
385 | lane = ~lane; |
386 | #endif |
387 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
388 | { |
389 | unsigned int i; |
390 | UINT64 lane1[1]; |
391 | lane1[0] = lane; |
392 | for(i=0; i<length; i++) |
393 | output[i] = input[i] ^ ((UINT8*)lane1)[offset+i]; |
394 | } |
395 | #else |
396 | unsigned int i; |
397 | lane >>= offset*8; |
398 | for(i=0; i<length; i++) { |
399 | output[i] = input[i] ^ (lane & 0xFF); |
400 | lane >>= 8; |
401 | } |
402 | #endif |
403 | } |
404 | |
405 | /* ---------------------------------------------------------------- */ |
406 | |
407 | void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) |
408 | { |
409 | unsigned int i; |
410 | #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) |
411 | unsigned char temp[8]; |
412 | unsigned int j; |
413 | #endif |
414 | |
415 | for(i=0; i<laneCount; i++) { |
416 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) |
417 | ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i]; |
418 | #else |
419 | fromWordToBytes(temp, ((const UINT64*)state)[i]); |
420 | for(j=0; j<8; j++) |
421 | output[i*8+j] = input[i*8+j] ^ temp[j]; |
422 | #endif |
423 | } |
424 | #ifdef KeccakP1600_useLaneComplementing |
425 | if (laneCount > 1) { |
426 | ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; |
427 | if (laneCount > 2) { |
428 | ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; |
429 | if (laneCount > 8) { |
430 | ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; |
431 | if (laneCount > 12) { |
432 | ((UINT64*)output)[12] = ~((UINT64*)output)[12]; |
433 | if (laneCount > 17) { |
434 | ((UINT64*)output)[17] = ~((UINT64*)output)[17]; |
435 | if (laneCount > 20) { |
436 | ((UINT64*)output)[20] = ~((UINT64*)output)[20]; |
437 | } |
438 | } |
439 | } |
440 | } |
441 | } |
442 | } |
443 | #endif |
444 | } |
445 | |
446 | /* ---------------------------------------------------------------- */ |
447 | |
448 | void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) |
449 | { |
450 | SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); |
451 | } |
452 | |
453 | /* ---------------------------------------------------------------- */ |
454 | |
455 | size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) |
456 | { |
457 | size_t originalDataByteLen = dataByteLen; |
458 | declareABCDE |
459 | #ifndef KeccakP1600_fullUnrolling |
460 | unsigned int i; |
461 | #endif |
462 | UINT64 *stateAsLanes = (UINT64*)state; |
463 | UINT64 *inDataAsLanes = (UINT64*)data; |
464 | |
465 | copyFromState(A, stateAsLanes) |
466 | while(dataByteLen >= laneCount*8) { |
467 | addInput(A, inDataAsLanes, laneCount) |
468 | rounds24 |
469 | inDataAsLanes += laneCount; |
470 | dataByteLen -= laneCount*8; |
471 | } |
472 | copyToState(stateAsLanes, A) |
473 | return originalDataByteLen - dataByteLen; |
474 | } |
475 | |