1/*
2Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4denoted as "the implementer".
5
6For more information, feedback or questions, please refer to our websites:
7http://keccak.noekeon.org/
8http://keyak.noekeon.org/
9http://ketje.noekeon.org/
10
11To the extent possible under law, the implementer has waived all copyright
12and related or neighboring rights to the source code in this file.
13http://creativecommons.org/publicdomain/zero/1.0/
14*/
15
16#define declareABCDE \
17 UINT64 Aba, Abe, Abi, Abo, Abu; \
18 UINT64 Aga, Age, Agi, Ago, Agu; \
19 UINT64 Aka, Ake, Aki, Ako, Aku; \
20 UINT64 Ama, Ame, Ami, Amo, Amu; \
21 UINT64 Asa, Ase, Asi, Aso, Asu; \
22 UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
23 UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
24 UINT64 Bka, Bke, Bki, Bko, Bku; \
25 UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
26 UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
27 UINT64 Ca, Ce, Ci, Co, Cu; \
28 UINT64 Da, De, Di, Do, Du; \
29 UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
30 UINT64 Ega, Ege, Egi, Ego, Egu; \
31 UINT64 Eka, Eke, Eki, Eko, Eku; \
32 UINT64 Ema, Eme, Emi, Emo, Emu; \
33 UINT64 Esa, Ese, Esi, Eso, Esu; \
34
35#define prepareTheta \
36 Ca = Aba^Aga^Aka^Ama^Asa; \
37 Ce = Abe^Age^Ake^Ame^Ase; \
38 Ci = Abi^Agi^Aki^Ami^Asi; \
39 Co = Abo^Ago^Ako^Amo^Aso; \
40 Cu = Abu^Agu^Aku^Amu^Asu; \
41
42#ifdef UseBebigokimisa
43/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
44
45/* --- 64-bit lanes mapped to 64-bit words */
46
47#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
48 Da = Cu^ROL64(Ce, 1); \
49 De = Ca^ROL64(Ci, 1); \
50 Di = Ce^ROL64(Co, 1); \
51 Do = Ci^ROL64(Cu, 1); \
52 Du = Co^ROL64(Ca, 1); \
53\
54 A##ba ^= Da; \
55 Bba = A##ba; \
56 A##ge ^= De; \
57 Bbe = ROL64(A##ge, 44); \
58 A##ki ^= Di; \
59 Bbi = ROL64(A##ki, 43); \
60 A##mo ^= Do; \
61 Bbo = ROL64(A##mo, 21); \
62 A##su ^= Du; \
63 Bbu = ROL64(A##su, 14); \
64 E##ba = Bba ^( Bbe | Bbi ); \
65 E##ba ^= KeccakF1600RoundConstants[i]; \
66 Ca = E##ba; \
67 E##be = Bbe ^((~Bbi)| Bbo ); \
68 Ce = E##be; \
69 E##bi = Bbi ^( Bbo & Bbu ); \
70 Ci = E##bi; \
71 E##bo = Bbo ^( Bbu | Bba ); \
72 Co = E##bo; \
73 E##bu = Bbu ^( Bba & Bbe ); \
74 Cu = E##bu; \
75\
76 A##bo ^= Do; \
77 Bga = ROL64(A##bo, 28); \
78 A##gu ^= Du; \
79 Bge = ROL64(A##gu, 20); \
80 A##ka ^= Da; \
81 Bgi = ROL64(A##ka, 3); \
82 A##me ^= De; \
83 Bgo = ROL64(A##me, 45); \
84 A##si ^= Di; \
85 Bgu = ROL64(A##si, 61); \
86 E##ga = Bga ^( Bge | Bgi ); \
87 Ca ^= E##ga; \
88 E##ge = Bge ^( Bgi & Bgo ); \
89 Ce ^= E##ge; \
90 E##gi = Bgi ^( Bgo |(~Bgu)); \
91 Ci ^= E##gi; \
92 E##go = Bgo ^( Bgu | Bga ); \
93 Co ^= E##go; \
94 E##gu = Bgu ^( Bga & Bge ); \
95 Cu ^= E##gu; \
96\
97 A##be ^= De; \
98 Bka = ROL64(A##be, 1); \
99 A##gi ^= Di; \
100 Bke = ROL64(A##gi, 6); \
101 A##ko ^= Do; \
102 Bki = ROL64(A##ko, 25); \
103 A##mu ^= Du; \
104 Bko = ROL64(A##mu, 8); \
105 A##sa ^= Da; \
106 Bku = ROL64(A##sa, 18); \
107 E##ka = Bka ^( Bke | Bki ); \
108 Ca ^= E##ka; \
109 E##ke = Bke ^( Bki & Bko ); \
110 Ce ^= E##ke; \
111 E##ki = Bki ^((~Bko)& Bku ); \
112 Ci ^= E##ki; \
113 E##ko = (~Bko)^( Bku | Bka ); \
114 Co ^= E##ko; \
115 E##ku = Bku ^( Bka & Bke ); \
116 Cu ^= E##ku; \
117\
118 A##bu ^= Du; \
119 Bma = ROL64(A##bu, 27); \
120 A##ga ^= Da; \
121 Bme = ROL64(A##ga, 36); \
122 A##ke ^= De; \
123 Bmi = ROL64(A##ke, 10); \
124 A##mi ^= Di; \
125 Bmo = ROL64(A##mi, 15); \
126 A##so ^= Do; \
127 Bmu = ROL64(A##so, 56); \
128 E##ma = Bma ^( Bme & Bmi ); \
129 Ca ^= E##ma; \
130 E##me = Bme ^( Bmi | Bmo ); \
131 Ce ^= E##me; \
132 E##mi = Bmi ^((~Bmo)| Bmu ); \
133 Ci ^= E##mi; \
134 E##mo = (~Bmo)^( Bmu & Bma ); \
135 Co ^= E##mo; \
136 E##mu = Bmu ^( Bma | Bme ); \
137 Cu ^= E##mu; \
138\
139 A##bi ^= Di; \
140 Bsa = ROL64(A##bi, 62); \
141 A##go ^= Do; \
142 Bse = ROL64(A##go, 55); \
143 A##ku ^= Du; \
144 Bsi = ROL64(A##ku, 39); \
145 A##ma ^= Da; \
146 Bso = ROL64(A##ma, 41); \
147 A##se ^= De; \
148 Bsu = ROL64(A##se, 2); \
149 E##sa = Bsa ^((~Bse)& Bsi ); \
150 Ca ^= E##sa; \
151 E##se = (~Bse)^( Bsi | Bso ); \
152 Ce ^= E##se; \
153 E##si = Bsi ^( Bso & Bsu ); \
154 Ci ^= E##si; \
155 E##so = Bso ^( Bsu | Bsa ); \
156 Co ^= E##so; \
157 E##su = Bsu ^( Bsa & Bse ); \
158 Cu ^= E##su; \
159\
160
161/* --- Code for round (lane complementing pattern 'bebigokimisa') */
162
163/* --- 64-bit lanes mapped to 64-bit words */
164
165#define thetaRhoPiChiIota(i, A, E) \
166 Da = Cu^ROL64(Ce, 1); \
167 De = Ca^ROL64(Ci, 1); \
168 Di = Ce^ROL64(Co, 1); \
169 Do = Ci^ROL64(Cu, 1); \
170 Du = Co^ROL64(Ca, 1); \
171\
172 A##ba ^= Da; \
173 Bba = A##ba; \
174 A##ge ^= De; \
175 Bbe = ROL64(A##ge, 44); \
176 A##ki ^= Di; \
177 Bbi = ROL64(A##ki, 43); \
178 A##mo ^= Do; \
179 Bbo = ROL64(A##mo, 21); \
180 A##su ^= Du; \
181 Bbu = ROL64(A##su, 14); \
182 E##ba = Bba ^( Bbe | Bbi ); \
183 E##ba ^= KeccakF1600RoundConstants[i]; \
184 E##be = Bbe ^((~Bbi)| Bbo ); \
185 E##bi = Bbi ^( Bbo & Bbu ); \
186 E##bo = Bbo ^( Bbu | Bba ); \
187 E##bu = Bbu ^( Bba & Bbe ); \
188\
189 A##bo ^= Do; \
190 Bga = ROL64(A##bo, 28); \
191 A##gu ^= Du; \
192 Bge = ROL64(A##gu, 20); \
193 A##ka ^= Da; \
194 Bgi = ROL64(A##ka, 3); \
195 A##me ^= De; \
196 Bgo = ROL64(A##me, 45); \
197 A##si ^= Di; \
198 Bgu = ROL64(A##si, 61); \
199 E##ga = Bga ^( Bge | Bgi ); \
200 E##ge = Bge ^( Bgi & Bgo ); \
201 E##gi = Bgi ^( Bgo |(~Bgu)); \
202 E##go = Bgo ^( Bgu | Bga ); \
203 E##gu = Bgu ^( Bga & Bge ); \
204\
205 A##be ^= De; \
206 Bka = ROL64(A##be, 1); \
207 A##gi ^= Di; \
208 Bke = ROL64(A##gi, 6); \
209 A##ko ^= Do; \
210 Bki = ROL64(A##ko, 25); \
211 A##mu ^= Du; \
212 Bko = ROL64(A##mu, 8); \
213 A##sa ^= Da; \
214 Bku = ROL64(A##sa, 18); \
215 E##ka = Bka ^( Bke | Bki ); \
216 E##ke = Bke ^( Bki & Bko ); \
217 E##ki = Bki ^((~Bko)& Bku ); \
218 E##ko = (~Bko)^( Bku | Bka ); \
219 E##ku = Bku ^( Bka & Bke ); \
220\
221 A##bu ^= Du; \
222 Bma = ROL64(A##bu, 27); \
223 A##ga ^= Da; \
224 Bme = ROL64(A##ga, 36); \
225 A##ke ^= De; \
226 Bmi = ROL64(A##ke, 10); \
227 A##mi ^= Di; \
228 Bmo = ROL64(A##mi, 15); \
229 A##so ^= Do; \
230 Bmu = ROL64(A##so, 56); \
231 E##ma = Bma ^( Bme & Bmi ); \
232 E##me = Bme ^( Bmi | Bmo ); \
233 E##mi = Bmi ^((~Bmo)| Bmu ); \
234 E##mo = (~Bmo)^( Bmu & Bma ); \
235 E##mu = Bmu ^( Bma | Bme ); \
236\
237 A##bi ^= Di; \
238 Bsa = ROL64(A##bi, 62); \
239 A##go ^= Do; \
240 Bse = ROL64(A##go, 55); \
241 A##ku ^= Du; \
242 Bsi = ROL64(A##ku, 39); \
243 A##ma ^= Da; \
244 Bso = ROL64(A##ma, 41); \
245 A##se ^= De; \
246 Bsu = ROL64(A##se, 2); \
247 E##sa = Bsa ^((~Bse)& Bsi ); \
248 E##se = (~Bse)^( Bsi | Bso ); \
249 E##si = Bsi ^( Bso & Bsu ); \
250 E##so = Bso ^( Bsu | Bsa ); \
251 E##su = Bsu ^( Bsa & Bse ); \
252\
253
254#else /* UseBebigokimisa */
255
256/* --- Code for round, with prepare-theta */
257
258/* --- 64-bit lanes mapped to 64-bit words */
259
260#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
261 Da = Cu^ROL64(Ce, 1); \
262 De = Ca^ROL64(Ci, 1); \
263 Di = Ce^ROL64(Co, 1); \
264 Do = Ci^ROL64(Cu, 1); \
265 Du = Co^ROL64(Ca, 1); \
266\
267 A##ba ^= Da; \
268 Bba = A##ba; \
269 A##ge ^= De; \
270 Bbe = ROL64(A##ge, 44); \
271 A##ki ^= Di; \
272 Bbi = ROL64(A##ki, 43); \
273 A##mo ^= Do; \
274 Bbo = ROL64(A##mo, 21); \
275 A##su ^= Du; \
276 Bbu = ROL64(A##su, 14); \
277 E##ba = Bba ^((~Bbe)& Bbi ); \
278 E##ba ^= KeccakF1600RoundConstants[i]; \
279 Ca = E##ba; \
280 E##be = Bbe ^((~Bbi)& Bbo ); \
281 Ce = E##be; \
282 E##bi = Bbi ^((~Bbo)& Bbu ); \
283 Ci = E##bi; \
284 E##bo = Bbo ^((~Bbu)& Bba ); \
285 Co = E##bo; \
286 E##bu = Bbu ^((~Bba)& Bbe ); \
287 Cu = E##bu; \
288\
289 A##bo ^= Do; \
290 Bga = ROL64(A##bo, 28); \
291 A##gu ^= Du; \
292 Bge = ROL64(A##gu, 20); \
293 A##ka ^= Da; \
294 Bgi = ROL64(A##ka, 3); \
295 A##me ^= De; \
296 Bgo = ROL64(A##me, 45); \
297 A##si ^= Di; \
298 Bgu = ROL64(A##si, 61); \
299 E##ga = Bga ^((~Bge)& Bgi ); \
300 Ca ^= E##ga; \
301 E##ge = Bge ^((~Bgi)& Bgo ); \
302 Ce ^= E##ge; \
303 E##gi = Bgi ^((~Bgo)& Bgu ); \
304 Ci ^= E##gi; \
305 E##go = Bgo ^((~Bgu)& Bga ); \
306 Co ^= E##go; \
307 E##gu = Bgu ^((~Bga)& Bge ); \
308 Cu ^= E##gu; \
309\
310 A##be ^= De; \
311 Bka = ROL64(A##be, 1); \
312 A##gi ^= Di; \
313 Bke = ROL64(A##gi, 6); \
314 A##ko ^= Do; \
315 Bki = ROL64(A##ko, 25); \
316 A##mu ^= Du; \
317 Bko = ROL64(A##mu, 8); \
318 A##sa ^= Da; \
319 Bku = ROL64(A##sa, 18); \
320 E##ka = Bka ^((~Bke)& Bki ); \
321 Ca ^= E##ka; \
322 E##ke = Bke ^((~Bki)& Bko ); \
323 Ce ^= E##ke; \
324 E##ki = Bki ^((~Bko)& Bku ); \
325 Ci ^= E##ki; \
326 E##ko = Bko ^((~Bku)& Bka ); \
327 Co ^= E##ko; \
328 E##ku = Bku ^((~Bka)& Bke ); \
329 Cu ^= E##ku; \
330\
331 A##bu ^= Du; \
332 Bma = ROL64(A##bu, 27); \
333 A##ga ^= Da; \
334 Bme = ROL64(A##ga, 36); \
335 A##ke ^= De; \
336 Bmi = ROL64(A##ke, 10); \
337 A##mi ^= Di; \
338 Bmo = ROL64(A##mi, 15); \
339 A##so ^= Do; \
340 Bmu = ROL64(A##so, 56); \
341 E##ma = Bma ^((~Bme)& Bmi ); \
342 Ca ^= E##ma; \
343 E##me = Bme ^((~Bmi)& Bmo ); \
344 Ce ^= E##me; \
345 E##mi = Bmi ^((~Bmo)& Bmu ); \
346 Ci ^= E##mi; \
347 E##mo = Bmo ^((~Bmu)& Bma ); \
348 Co ^= E##mo; \
349 E##mu = Bmu ^((~Bma)& Bme ); \
350 Cu ^= E##mu; \
351\
352 A##bi ^= Di; \
353 Bsa = ROL64(A##bi, 62); \
354 A##go ^= Do; \
355 Bse = ROL64(A##go, 55); \
356 A##ku ^= Du; \
357 Bsi = ROL64(A##ku, 39); \
358 A##ma ^= Da; \
359 Bso = ROL64(A##ma, 41); \
360 A##se ^= De; \
361 Bsu = ROL64(A##se, 2); \
362 E##sa = Bsa ^((~Bse)& Bsi ); \
363 Ca ^= E##sa; \
364 E##se = Bse ^((~Bsi)& Bso ); \
365 Ce ^= E##se; \
366 E##si = Bsi ^((~Bso)& Bsu ); \
367 Ci ^= E##si; \
368 E##so = Bso ^((~Bsu)& Bsa ); \
369 Co ^= E##so; \
370 E##su = Bsu ^((~Bsa)& Bse ); \
371 Cu ^= E##su; \
372\
373
374/* --- Code for round */
375
376/* --- 64-bit lanes mapped to 64-bit words */
377
378#define thetaRhoPiChiIota(i, A, E) \
379 Da = Cu^ROL64(Ce, 1); \
380 De = Ca^ROL64(Ci, 1); \
381 Di = Ce^ROL64(Co, 1); \
382 Do = Ci^ROL64(Cu, 1); \
383 Du = Co^ROL64(Ca, 1); \
384\
385 A##ba ^= Da; \
386 Bba = A##ba; \
387 A##ge ^= De; \
388 Bbe = ROL64(A##ge, 44); \
389 A##ki ^= Di; \
390 Bbi = ROL64(A##ki, 43); \
391 A##mo ^= Do; \
392 Bbo = ROL64(A##mo, 21); \
393 A##su ^= Du; \
394 Bbu = ROL64(A##su, 14); \
395 E##ba = Bba ^((~Bbe)& Bbi ); \
396 E##ba ^= KeccakF1600RoundConstants[i]; \
397 E##be = Bbe ^((~Bbi)& Bbo ); \
398 E##bi = Bbi ^((~Bbo)& Bbu ); \
399 E##bo = Bbo ^((~Bbu)& Bba ); \
400 E##bu = Bbu ^((~Bba)& Bbe ); \
401\
402 A##bo ^= Do; \
403 Bga = ROL64(A##bo, 28); \
404 A##gu ^= Du; \
405 Bge = ROL64(A##gu, 20); \
406 A##ka ^= Da; \
407 Bgi = ROL64(A##ka, 3); \
408 A##me ^= De; \
409 Bgo = ROL64(A##me, 45); \
410 A##si ^= Di; \
411 Bgu = ROL64(A##si, 61); \
412 E##ga = Bga ^((~Bge)& Bgi ); \
413 E##ge = Bge ^((~Bgi)& Bgo ); \
414 E##gi = Bgi ^((~Bgo)& Bgu ); \
415 E##go = Bgo ^((~Bgu)& Bga ); \
416 E##gu = Bgu ^((~Bga)& Bge ); \
417\
418 A##be ^= De; \
419 Bka = ROL64(A##be, 1); \
420 A##gi ^= Di; \
421 Bke = ROL64(A##gi, 6); \
422 A##ko ^= Do; \
423 Bki = ROL64(A##ko, 25); \
424 A##mu ^= Du; \
425 Bko = ROL64(A##mu, 8); \
426 A##sa ^= Da; \
427 Bku = ROL64(A##sa, 18); \
428 E##ka = Bka ^((~Bke)& Bki ); \
429 E##ke = Bke ^((~Bki)& Bko ); \
430 E##ki = Bki ^((~Bko)& Bku ); \
431 E##ko = Bko ^((~Bku)& Bka ); \
432 E##ku = Bku ^((~Bka)& Bke ); \
433\
434 A##bu ^= Du; \
435 Bma = ROL64(A##bu, 27); \
436 A##ga ^= Da; \
437 Bme = ROL64(A##ga, 36); \
438 A##ke ^= De; \
439 Bmi = ROL64(A##ke, 10); \
440 A##mi ^= Di; \
441 Bmo = ROL64(A##mi, 15); \
442 A##so ^= Do; \
443 Bmu = ROL64(A##so, 56); \
444 E##ma = Bma ^((~Bme)& Bmi ); \
445 E##me = Bme ^((~Bmi)& Bmo ); \
446 E##mi = Bmi ^((~Bmo)& Bmu ); \
447 E##mo = Bmo ^((~Bmu)& Bma ); \
448 E##mu = Bmu ^((~Bma)& Bme ); \
449\
450 A##bi ^= Di; \
451 Bsa = ROL64(A##bi, 62); \
452 A##go ^= Do; \
453 Bse = ROL64(A##go, 55); \
454 A##ku ^= Du; \
455 Bsi = ROL64(A##ku, 39); \
456 A##ma ^= Da; \
457 Bso = ROL64(A##ma, 41); \
458 A##se ^= De; \
459 Bsu = ROL64(A##se, 2); \
460 E##sa = Bsa ^((~Bse)& Bsi ); \
461 E##se = Bse ^((~Bsi)& Bso ); \
462 E##si = Bsi ^((~Bso)& Bsu ); \
463 E##so = Bso ^((~Bsu)& Bsa ); \
464 E##su = Bsu ^((~Bsa)& Bse ); \
465\
466
467#endif /* UseBebigokimisa */
468
469
470#define copyFromState(X, state) \
471 X##ba = state[ 0]; \
472 X##be = state[ 1]; \
473 X##bi = state[ 2]; \
474 X##bo = state[ 3]; \
475 X##bu = state[ 4]; \
476 X##ga = state[ 5]; \
477 X##ge = state[ 6]; \
478 X##gi = state[ 7]; \
479 X##go = state[ 8]; \
480 X##gu = state[ 9]; \
481 X##ka = state[10]; \
482 X##ke = state[11]; \
483 X##ki = state[12]; \
484 X##ko = state[13]; \
485 X##ku = state[14]; \
486 X##ma = state[15]; \
487 X##me = state[16]; \
488 X##mi = state[17]; \
489 X##mo = state[18]; \
490 X##mu = state[19]; \
491 X##sa = state[20]; \
492 X##se = state[21]; \
493 X##si = state[22]; \
494 X##so = state[23]; \
495 X##su = state[24]; \
496
497#define copyToState(state, X) \
498 state[ 0] = X##ba; \
499 state[ 1] = X##be; \
500 state[ 2] = X##bi; \
501 state[ 3] = X##bo; \
502 state[ 4] = X##bu; \
503 state[ 5] = X##ga; \
504 state[ 6] = X##ge; \
505 state[ 7] = X##gi; \
506 state[ 8] = X##go; \
507 state[ 9] = X##gu; \
508 state[10] = X##ka; \
509 state[11] = X##ke; \
510 state[12] = X##ki; \
511 state[13] = X##ko; \
512 state[14] = X##ku; \
513 state[15] = X##ma; \
514 state[16] = X##me; \
515 state[17] = X##mi; \
516 state[18] = X##mo; \
517 state[19] = X##mu; \
518 state[20] = X##sa; \
519 state[21] = X##se; \
520 state[22] = X##si; \
521 state[23] = X##so; \
522 state[24] = X##su; \
523
524#define copyStateVariables(X, Y) \
525 X##ba = Y##ba; \
526 X##be = Y##be; \
527 X##bi = Y##bi; \
528 X##bo = Y##bo; \
529 X##bu = Y##bu; \
530 X##ga = Y##ga; \
531 X##ge = Y##ge; \
532 X##gi = Y##gi; \
533 X##go = Y##go; \
534 X##gu = Y##gu; \
535 X##ka = Y##ka; \
536 X##ke = Y##ke; \
537 X##ki = Y##ki; \
538 X##ko = Y##ko; \
539 X##ku = Y##ku; \
540 X##ma = Y##ma; \
541 X##me = Y##me; \
542 X##mi = Y##mi; \
543 X##mo = Y##mo; \
544 X##mu = Y##mu; \
545 X##sa = Y##sa; \
546 X##se = Y##se; \
547 X##si = Y##si; \
548 X##so = Y##so; \
549 X##su = Y##su; \
550
551#define copyFromStateAndAdd(X, state, input, laneCount) \
552 if (laneCount < 16) { \
553 if (laneCount < 8) { \
554 if (laneCount < 4) { \
555 if (laneCount < 2) { \
556 if (laneCount < 1) { \
557 X##ba = state[ 0]; \
558 } \
559 else { \
560 X##ba = state[ 0]^input[ 0]; \
561 } \
562 X##be = state[ 1]; \
563 X##bi = state[ 2]; \
564 } \
565 else { \
566 X##ba = state[ 0]^input[ 0]; \
567 X##be = state[ 1]^input[ 1]; \
568 if (laneCount < 3) { \
569 X##bi = state[ 2]; \
570 } \
571 else { \
572 X##bi = state[ 2]^input[ 2]; \
573 } \
574 } \
575 X##bo = state[ 3]; \
576 X##bu = state[ 4]; \
577 X##ga = state[ 5]; \
578 X##ge = state[ 6]; \
579 } \
580 else { \
581 X##ba = state[ 0]^input[ 0]; \
582 X##be = state[ 1]^input[ 1]; \
583 X##bi = state[ 2]^input[ 2]; \
584 X##bo = state[ 3]^input[ 3]; \
585 if (laneCount < 6) { \
586 if (laneCount < 5) { \
587 X##bu = state[ 4]; \
588 } \
589 else { \
590 X##bu = state[ 4]^input[ 4]; \
591 } \
592 X##ga = state[ 5]; \
593 X##ge = state[ 6]; \
594 } \
595 else { \
596 X##bu = state[ 4]^input[ 4]; \
597 X##ga = state[ 5]^input[ 5]; \
598 if (laneCount < 7) { \
599 X##ge = state[ 6]; \
600 } \
601 else { \
602 X##ge = state[ 6]^input[ 6]; \
603 } \
604 } \
605 } \
606 X##gi = state[ 7]; \
607 X##go = state[ 8]; \
608 X##gu = state[ 9]; \
609 X##ka = state[10]; \
610 X##ke = state[11]; \
611 X##ki = state[12]; \
612 X##ko = state[13]; \
613 X##ku = state[14]; \
614 } \
615 else { \
616 X##ba = state[ 0]^input[ 0]; \
617 X##be = state[ 1]^input[ 1]; \
618 X##bi = state[ 2]^input[ 2]; \
619 X##bo = state[ 3]^input[ 3]; \
620 X##bu = state[ 4]^input[ 4]; \
621 X##ga = state[ 5]^input[ 5]; \
622 X##ge = state[ 6]^input[ 6]; \
623 X##gi = state[ 7]^input[ 7]; \
624 if (laneCount < 12) { \
625 if (laneCount < 10) { \
626 if (laneCount < 9) { \
627 X##go = state[ 8]; \
628 } \
629 else { \
630 X##go = state[ 8]^input[ 8]; \
631 } \
632 X##gu = state[ 9]; \
633 X##ka = state[10]; \
634 } \
635 else { \
636 X##go = state[ 8]^input[ 8]; \
637 X##gu = state[ 9]^input[ 9]; \
638 if (laneCount < 11) { \
639 X##ka = state[10]; \
640 } \
641 else { \
642 X##ka = state[10]^input[10]; \
643 } \
644 } \
645 X##ke = state[11]; \
646 X##ki = state[12]; \
647 X##ko = state[13]; \
648 X##ku = state[14]; \
649 } \
650 else { \
651 X##go = state[ 8]^input[ 8]; \
652 X##gu = state[ 9]^input[ 9]; \
653 X##ka = state[10]^input[10]; \
654 X##ke = state[11]^input[11]; \
655 if (laneCount < 14) { \
656 if (laneCount < 13) { \
657 X##ki = state[12]; \
658 } \
659 else { \
660 X##ki = state[12]^input[12]; \
661 } \
662 X##ko = state[13]; \
663 X##ku = state[14]; \
664 } \
665 else { \
666 X##ki = state[12]^input[12]; \
667 X##ko = state[13]^input[13]; \
668 if (laneCount < 15) { \
669 X##ku = state[14]; \
670 } \
671 else { \
672 X##ku = state[14]^input[14]; \
673 } \
674 } \
675 } \
676 } \
677 X##ma = state[15]; \
678 X##me = state[16]; \
679 X##mi = state[17]; \
680 X##mo = state[18]; \
681 X##mu = state[19]; \
682 X##sa = state[20]; \
683 X##se = state[21]; \
684 X##si = state[22]; \
685 X##so = state[23]; \
686 X##su = state[24]; \
687 } \
688 else { \
689 X##ba = state[ 0]^input[ 0]; \
690 X##be = state[ 1]^input[ 1]; \
691 X##bi = state[ 2]^input[ 2]; \
692 X##bo = state[ 3]^input[ 3]; \
693 X##bu = state[ 4]^input[ 4]; \
694 X##ga = state[ 5]^input[ 5]; \
695 X##ge = state[ 6]^input[ 6]; \
696 X##gi = state[ 7]^input[ 7]; \
697 X##go = state[ 8]^input[ 8]; \
698 X##gu = state[ 9]^input[ 9]; \
699 X##ka = state[10]^input[10]; \
700 X##ke = state[11]^input[11]; \
701 X##ki = state[12]^input[12]; \
702 X##ko = state[13]^input[13]; \
703 X##ku = state[14]^input[14]; \
704 X##ma = state[15]^input[15]; \
705 if (laneCount < 24) { \
706 if (laneCount < 20) { \
707 if (laneCount < 18) { \
708 if (laneCount < 17) { \
709 X##me = state[16]; \
710 } \
711 else { \
712 X##me = state[16]^input[16]; \
713 } \
714 X##mi = state[17]; \
715 X##mo = state[18]; \
716 } \
717 else { \
718 X##me = state[16]^input[16]; \
719 X##mi = state[17]^input[17]; \
720 if (laneCount < 19) { \
721 X##mo = state[18]; \
722 } \
723 else { \
724 X##mo = state[18]^input[18]; \
725 } \
726 } \
727 X##mu = state[19]; \
728 X##sa = state[20]; \
729 X##se = state[21]; \
730 X##si = state[22]; \
731 } \
732 else { \
733 X##me = state[16]^input[16]; \
734 X##mi = state[17]^input[17]; \
735 X##mo = state[18]^input[18]; \
736 X##mu = state[19]^input[19]; \
737 if (laneCount < 22) { \
738 if (laneCount < 21) { \
739 X##sa = state[20]; \
740 } \
741 else { \
742 X##sa = state[20]^input[20]; \
743 } \
744 X##se = state[21]; \
745 X##si = state[22]; \
746 } \
747 else { \
748 X##sa = state[20]^input[20]; \
749 X##se = state[21]^input[21]; \
750 if (laneCount < 23) { \
751 X##si = state[22]; \
752 } \
753 else { \
754 X##si = state[22]^input[22]; \
755 } \
756 } \
757 } \
758 X##so = state[23]; \
759 X##su = state[24]; \
760 } \
761 else { \
762 X##me = state[16]^input[16]; \
763 X##mi = state[17]^input[17]; \
764 X##mo = state[18]^input[18]; \
765 X##mu = state[19]^input[19]; \
766 X##sa = state[20]^input[20]; \
767 X##se = state[21]^input[21]; \
768 X##si = state[22]^input[22]; \
769 X##so = state[23]^input[23]; \
770 if (laneCount < 25) { \
771 X##su = state[24]; \
772 } \
773 else { \
774 X##su = state[24]^input[24]; \
775 } \
776 } \
777 }
778
779#define addInput(X, input, laneCount) \
780 if (laneCount == 21) { \
781 X##ba ^= input[ 0]; \
782 X##be ^= input[ 1]; \
783 X##bi ^= input[ 2]; \
784 X##bo ^= input[ 3]; \
785 X##bu ^= input[ 4]; \
786 X##ga ^= input[ 5]; \
787 X##ge ^= input[ 6]; \
788 X##gi ^= input[ 7]; \
789 X##go ^= input[ 8]; \
790 X##gu ^= input[ 9]; \
791 X##ka ^= input[10]; \
792 X##ke ^= input[11]; \
793 X##ki ^= input[12]; \
794 X##ko ^= input[13]; \
795 X##ku ^= input[14]; \
796 X##ma ^= input[15]; \
797 X##me ^= input[16]; \
798 X##mi ^= input[17]; \
799 X##mo ^= input[18]; \
800 X##mu ^= input[19]; \
801 X##sa ^= input[20]; \
802 } \
803 else if (laneCount < 16) { \
804 if (laneCount < 8) { \
805 if (laneCount < 4) { \
806 if (laneCount < 2) { \
807 if (laneCount < 1) { \
808 } \
809 else { \
810 X##ba ^= input[ 0]; \
811 } \
812 } \
813 else { \
814 X##ba ^= input[ 0]; \
815 X##be ^= input[ 1]; \
816 if (laneCount < 3) { \
817 } \
818 else { \
819 X##bi ^= input[ 2]; \
820 } \
821 } \
822 } \
823 else { \
824 X##ba ^= input[ 0]; \
825 X##be ^= input[ 1]; \
826 X##bi ^= input[ 2]; \
827 X##bo ^= input[ 3]; \
828 if (laneCount < 6) { \
829 if (laneCount < 5) { \
830 } \
831 else { \
832 X##bu ^= input[ 4]; \
833 } \
834 } \
835 else { \
836 X##bu ^= input[ 4]; \
837 X##ga ^= input[ 5]; \
838 if (laneCount < 7) { \
839 } \
840 else { \
841 X##ge ^= input[ 6]; \
842 } \
843 } \
844 } \
845 } \
846 else { \
847 X##ba ^= input[ 0]; \
848 X##be ^= input[ 1]; \
849 X##bi ^= input[ 2]; \
850 X##bo ^= input[ 3]; \
851 X##bu ^= input[ 4]; \
852 X##ga ^= input[ 5]; \
853 X##ge ^= input[ 6]; \
854 X##gi ^= input[ 7]; \
855 if (laneCount < 12) { \
856 if (laneCount < 10) { \
857 if (laneCount < 9) { \
858 } \
859 else { \
860 X##go ^= input[ 8]; \
861 } \
862 } \
863 else { \
864 X##go ^= input[ 8]; \
865 X##gu ^= input[ 9]; \
866 if (laneCount < 11) { \
867 } \
868 else { \
869 X##ka ^= input[10]; \
870 } \
871 } \
872 } \
873 else { \
874 X##go ^= input[ 8]; \
875 X##gu ^= input[ 9]; \
876 X##ka ^= input[10]; \
877 X##ke ^= input[11]; \
878 if (laneCount < 14) { \
879 if (laneCount < 13) { \
880 } \
881 else { \
882 X##ki ^= input[12]; \
883 } \
884 } \
885 else { \
886 X##ki ^= input[12]; \
887 X##ko ^= input[13]; \
888 if (laneCount < 15) { \
889 } \
890 else { \
891 X##ku ^= input[14]; \
892 } \
893 } \
894 } \
895 } \
896 } \
897 else { \
898 X##ba ^= input[ 0]; \
899 X##be ^= input[ 1]; \
900 X##bi ^= input[ 2]; \
901 X##bo ^= input[ 3]; \
902 X##bu ^= input[ 4]; \
903 X##ga ^= input[ 5]; \
904 X##ge ^= input[ 6]; \
905 X##gi ^= input[ 7]; \
906 X##go ^= input[ 8]; \
907 X##gu ^= input[ 9]; \
908 X##ka ^= input[10]; \
909 X##ke ^= input[11]; \
910 X##ki ^= input[12]; \
911 X##ko ^= input[13]; \
912 X##ku ^= input[14]; \
913 X##ma ^= input[15]; \
914 if (laneCount < 24) { \
915 if (laneCount < 20) { \
916 if (laneCount < 18) { \
917 if (laneCount < 17) { \
918 } \
919 else { \
920 X##me ^= input[16]; \
921 } \
922 } \
923 else { \
924 X##me ^= input[16]; \
925 X##mi ^= input[17]; \
926 if (laneCount < 19) { \
927 } \
928 else { \
929 X##mo ^= input[18]; \
930 } \
931 } \
932 } \
933 else { \
934 X##me ^= input[16]; \
935 X##mi ^= input[17]; \
936 X##mo ^= input[18]; \
937 X##mu ^= input[19]; \
938 if (laneCount < 22) { \
939 if (laneCount < 21) { \
940 } \
941 else { \
942 X##sa ^= input[20]; \
943 } \
944 } \
945 else { \
946 X##sa ^= input[20]; \
947 X##se ^= input[21]; \
948 if (laneCount < 23) { \
949 } \
950 else { \
951 X##si ^= input[22]; \
952 } \
953 } \
954 } \
955 } \
956 else { \
957 X##me ^= input[16]; \
958 X##mi ^= input[17]; \
959 X##mo ^= input[18]; \
960 X##mu ^= input[19]; \
961 X##sa ^= input[20]; \
962 X##se ^= input[21]; \
963 X##si ^= input[22]; \
964 X##so ^= input[23]; \
965 if (laneCount < 25) { \
966 } \
967 else { \
968 X##su ^= input[24]; \
969 } \
970 } \
971 }
972
973#ifdef UseBebigokimisa
974
975#define copyToStateAndOutput(X, state, output, laneCount) \
976 if (laneCount < 16) { \
977 if (laneCount < 8) { \
978 if (laneCount < 4) { \
979 if (laneCount < 2) { \
980 state[ 0] = X##ba; \
981 if (laneCount >= 1) { \
982 output[ 0] = X##ba; \
983 } \
984 state[ 1] = X##be; \
985 state[ 2] = X##bi; \
986 } \
987 else { \
988 state[ 0] = X##ba; \
989 output[ 0] = X##ba; \
990 state[ 1] = X##be; \
991 output[ 1] = ~X##be; \
992 state[ 2] = X##bi; \
993 if (laneCount >= 3) { \
994 output[ 2] = ~X##bi; \
995 } \
996 } \
997 state[ 3] = X##bo; \
998 state[ 4] = X##bu; \
999 state[ 5] = X##ga; \
1000 state[ 6] = X##ge; \
1001 } \
1002 else { \
1003 state[ 0] = X##ba; \
1004 output[ 0] = X##ba; \
1005 state[ 1] = X##be; \
1006 output[ 1] = ~X##be; \
1007 state[ 2] = X##bi; \
1008 output[ 2] = ~X##bi; \
1009 state[ 3] = X##bo; \
1010 output[ 3] = X##bo; \
1011 if (laneCount < 6) { \
1012 state[ 4] = X##bu; \
1013 if (laneCount >= 5) { \
1014 output[ 4] = X##bu; \
1015 } \
1016 state[ 5] = X##ga; \
1017 state[ 6] = X##ge; \
1018 } \
1019 else { \
1020 state[ 4] = X##bu; \
1021 output[ 4] = X##bu; \
1022 state[ 5] = X##ga; \
1023 output[ 5] = X##ga; \
1024 state[ 6] = X##ge; \
1025 if (laneCount >= 7) { \
1026 output[ 6] = X##ge; \
1027 } \
1028 } \
1029 } \
1030 state[ 7] = X##gi; \
1031 state[ 8] = X##go; \
1032 state[ 9] = X##gu; \
1033 state[10] = X##ka; \
1034 state[11] = X##ke; \
1035 state[12] = X##ki; \
1036 state[13] = X##ko; \
1037 state[14] = X##ku; \
1038 } \
1039 else { \
1040 state[ 0] = X##ba; \
1041 output[ 0] = X##ba; \
1042 state[ 1] = X##be; \
1043 output[ 1] = ~X##be; \
1044 state[ 2] = X##bi; \
1045 output[ 2] = ~X##bi; \
1046 state[ 3] = X##bo; \
1047 output[ 3] = X##bo; \
1048 state[ 4] = X##bu; \
1049 output[ 4] = X##bu; \
1050 state[ 5] = X##ga; \
1051 output[ 5] = X##ga; \
1052 state[ 6] = X##ge; \
1053 output[ 6] = X##ge; \
1054 state[ 7] = X##gi; \
1055 output[ 7] = X##gi; \
1056 if (laneCount < 12) { \
1057 if (laneCount < 10) { \
1058 state[ 8] = X##go; \
1059 if (laneCount >= 9) { \
1060 output[ 8] = ~X##go; \
1061 } \
1062 state[ 9] = X##gu; \
1063 state[10] = X##ka; \
1064 } \
1065 else { \
1066 state[ 8] = X##go; \
1067 output[ 8] = ~X##go; \
1068 state[ 9] = X##gu; \
1069 output[ 9] = X##gu; \
1070 state[10] = X##ka; \
1071 if (laneCount >= 11) { \
1072 output[10] = X##ka; \
1073 } \
1074 } \
1075 state[11] = X##ke; \
1076 state[12] = X##ki; \
1077 state[13] = X##ko; \
1078 state[14] = X##ku; \
1079 } \
1080 else { \
1081 state[ 8] = X##go; \
1082 output[ 8] = ~X##go; \
1083 state[ 9] = X##gu; \
1084 output[ 9] = X##gu; \
1085 state[10] = X##ka; \
1086 output[10] = X##ka; \
1087 state[11] = X##ke; \
1088 output[11] = X##ke; \
1089 if (laneCount < 14) { \
1090 state[12] = X##ki; \
1091 if (laneCount >= 13) { \
1092 output[12] = ~X##ki; \
1093 } \
1094 state[13] = X##ko; \
1095 state[14] = X##ku; \
1096 } \
1097 else { \
1098 state[12] = X##ki; \
1099 output[12] = ~X##ki; \
1100 state[13] = X##ko; \
1101 output[13] = X##ko; \
1102 state[14] = X##ku; \
1103 if (laneCount >= 15) { \
1104 output[14] = X##ku; \
1105 } \
1106 } \
1107 } \
1108 } \
1109 state[15] = X##ma; \
1110 state[16] = X##me; \
1111 state[17] = X##mi; \
1112 state[18] = X##mo; \
1113 state[19] = X##mu; \
1114 state[20] = X##sa; \
1115 state[21] = X##se; \
1116 state[22] = X##si; \
1117 state[23] = X##so; \
1118 state[24] = X##su; \
1119 } \
1120 else { \
1121 state[ 0] = X##ba; \
1122 output[ 0] = X##ba; \
1123 state[ 1] = X##be; \
1124 output[ 1] = ~X##be; \
1125 state[ 2] = X##bi; \
1126 output[ 2] = ~X##bi; \
1127 state[ 3] = X##bo; \
1128 output[ 3] = X##bo; \
1129 state[ 4] = X##bu; \
1130 output[ 4] = X##bu; \
1131 state[ 5] = X##ga; \
1132 output[ 5] = X##ga; \
1133 state[ 6] = X##ge; \
1134 output[ 6] = X##ge; \
1135 state[ 7] = X##gi; \
1136 output[ 7] = X##gi; \
1137 state[ 8] = X##go; \
1138 output[ 8] = ~X##go; \
1139 state[ 9] = X##gu; \
1140 output[ 9] = X##gu; \
1141 state[10] = X##ka; \
1142 output[10] = X##ka; \
1143 state[11] = X##ke; \
1144 output[11] = X##ke; \
1145 state[12] = X##ki; \
1146 output[12] = ~X##ki; \
1147 state[13] = X##ko; \
1148 output[13] = X##ko; \
1149 state[14] = X##ku; \
1150 output[14] = X##ku; \
1151 state[15] = X##ma; \
1152 output[15] = X##ma; \
1153 if (laneCount < 24) { \
1154 if (laneCount < 20) { \
1155 if (laneCount < 18) { \
1156 state[16] = X##me; \
1157 if (laneCount >= 17) { \
1158 output[16] = X##me; \
1159 } \
1160 state[17] = X##mi; \
1161 state[18] = X##mo; \
1162 } \
1163 else { \
1164 state[16] = X##me; \
1165 output[16] = X##me; \
1166 state[17] = X##mi; \
1167 output[17] = ~X##mi; \
1168 state[18] = X##mo; \
1169 if (laneCount >= 19) { \
1170 output[18] = X##mo; \
1171 } \
1172 } \
1173 state[19] = X##mu; \
1174 state[20] = X##sa; \
1175 state[21] = X##se; \
1176 state[22] = X##si; \
1177 } \
1178 else { \
1179 state[16] = X##me; \
1180 output[16] = X##me; \
1181 state[17] = X##mi; \
1182 output[17] = ~X##mi; \
1183 state[18] = X##mo; \
1184 output[18] = X##mo; \
1185 state[19] = X##mu; \
1186 output[19] = X##mu; \
1187 if (laneCount < 22) { \
1188 state[20] = X##sa; \
1189 if (laneCount >= 21) { \
1190 output[20] = ~X##sa; \
1191 } \
1192 state[21] = X##se; \
1193 state[22] = X##si; \
1194 } \
1195 else { \
1196 state[20] = X##sa; \
1197 output[20] = ~X##sa; \
1198 state[21] = X##se; \
1199 output[21] = X##se; \
1200 state[22] = X##si; \
1201 if (laneCount >= 23) { \
1202 output[22] = X##si; \
1203 } \
1204 } \
1205 } \
1206 state[23] = X##so; \
1207 state[24] = X##su; \
1208 } \
1209 else { \
1210 state[16] = X##me; \
1211 output[16] = X##me; \
1212 state[17] = X##mi; \
1213 output[17] = ~X##mi; \
1214 state[18] = X##mo; \
1215 output[18] = X##mo; \
1216 state[19] = X##mu; \
1217 output[19] = X##mu; \
1218 state[20] = X##sa; \
1219 output[20] = ~X##sa; \
1220 state[21] = X##se; \
1221 output[21] = X##se; \
1222 state[22] = X##si; \
1223 output[22] = X##si; \
1224 state[23] = X##so; \
1225 output[23] = X##so; \
1226 state[24] = X##su; \
1227 if (laneCount >= 25) { \
1228 output[24] = X##su; \
1229 } \
1230 } \
1231 }
1232
1233#define output(X, output, laneCount) \
1234 if (laneCount < 16) { \
1235 if (laneCount < 8) { \
1236 if (laneCount < 4) { \
1237 if (laneCount < 2) { \
1238 if (laneCount >= 1) { \
1239 output[ 0] = X##ba; \
1240 } \
1241 } \
1242 else { \
1243 output[ 0] = X##ba; \
1244 output[ 1] = ~X##be; \
1245 if (laneCount >= 3) { \
1246 output[ 2] = ~X##bi; \
1247 } \
1248 } \
1249 } \
1250 else { \
1251 output[ 0] = X##ba; \
1252 output[ 1] = ~X##be; \
1253 output[ 2] = ~X##bi; \
1254 output[ 3] = X##bo; \
1255 if (laneCount < 6) { \
1256 if (laneCount >= 5) { \
1257 output[ 4] = X##bu; \
1258 } \
1259 } \
1260 else { \
1261 output[ 4] = X##bu; \
1262 output[ 5] = X##ga; \
1263 if (laneCount >= 7) { \
1264 output[ 6] = X##ge; \
1265 } \
1266 } \
1267 } \
1268 } \
1269 else { \
1270 output[ 0] = X##ba; \
1271 output[ 1] = ~X##be; \
1272 output[ 2] = ~X##bi; \
1273 output[ 3] = X##bo; \
1274 output[ 4] = X##bu; \
1275 output[ 5] = X##ga; \
1276 output[ 6] = X##ge; \
1277 output[ 7] = X##gi; \
1278 if (laneCount < 12) { \
1279 if (laneCount < 10) { \
1280 if (laneCount >= 9) { \
1281 output[ 8] = ~X##go; \
1282 } \
1283 } \
1284 else { \
1285 output[ 8] = ~X##go; \
1286 output[ 9] = X##gu; \
1287 if (laneCount >= 11) { \
1288 output[10] = X##ka; \
1289 } \
1290 } \
1291 } \
1292 else { \
1293 output[ 8] = ~X##go; \
1294 output[ 9] = X##gu; \
1295 output[10] = X##ka; \
1296 output[11] = X##ke; \
1297 if (laneCount < 14) { \
1298 if (laneCount >= 13) { \
1299 output[12] = ~X##ki; \
1300 } \
1301 } \
1302 else { \
1303 output[12] = ~X##ki; \
1304 output[13] = X##ko; \
1305 if (laneCount >= 15) { \
1306 output[14] = X##ku; \
1307 } \
1308 } \
1309 } \
1310 } \
1311 } \
1312 else { \
1313 output[ 0] = X##ba; \
1314 output[ 1] = ~X##be; \
1315 output[ 2] = ~X##bi; \
1316 output[ 3] = X##bo; \
1317 output[ 4] = X##bu; \
1318 output[ 5] = X##ga; \
1319 output[ 6] = X##ge; \
1320 output[ 7] = X##gi; \
1321 output[ 8] = ~X##go; \
1322 output[ 9] = X##gu; \
1323 output[10] = X##ka; \
1324 output[11] = X##ke; \
1325 output[12] = ~X##ki; \
1326 output[13] = X##ko; \
1327 output[14] = X##ku; \
1328 output[15] = X##ma; \
1329 if (laneCount < 24) { \
1330 if (laneCount < 20) { \
1331 if (laneCount < 18) { \
1332 if (laneCount >= 17) { \
1333 output[16] = X##me; \
1334 } \
1335 } \
1336 else { \
1337 output[16] = X##me; \
1338 output[17] = ~X##mi; \
1339 if (laneCount >= 19) { \
1340 output[18] = X##mo; \
1341 } \
1342 } \
1343 } \
1344 else { \
1345 output[16] = X##me; \
1346 output[17] = ~X##mi; \
1347 output[18] = X##mo; \
1348 output[19] = X##mu; \
1349 if (laneCount < 22) { \
1350 if (laneCount >= 21) { \
1351 output[20] = ~X##sa; \
1352 } \
1353 } \
1354 else { \
1355 output[20] = ~X##sa; \
1356 output[21] = X##se; \
1357 if (laneCount >= 23) { \
1358 output[22] = X##si; \
1359 } \
1360 } \
1361 } \
1362 } \
1363 else { \
1364 output[16] = X##me; \
1365 output[17] = ~X##mi; \
1366 output[18] = X##mo; \
1367 output[19] = X##mu; \
1368 output[20] = ~X##sa; \
1369 output[21] = X##se; \
1370 output[22] = X##si; \
1371 output[23] = X##so; \
1372 if (laneCount >= 25) { \
1373 output[24] = X##su; \
1374 } \
1375 } \
1376 }
1377
1378#define wrapOne(X, input, output, index, name) \
1379 X##name ^= input[index]; \
1380 output[index] = X##name;
1381
1382#define wrapOneInvert(X, input, output, index, name) \
1383 X##name ^= input[index]; \
1384 output[index] = ~X##name;
1385
1386#define unwrapOne(X, input, output, index, name) \
1387 output[index] = input[index] ^ X##name; \
1388 X##name ^= output[index];
1389
1390#define unwrapOneInvert(X, input, output, index, name) \
1391 output[index] = ~(input[index] ^ X##name); \
1392 X##name ^= output[index]; \
1393
1394#else /* UseBebigokimisa */
1395
1396
1397#define copyToStateAndOutput(X, state, output, laneCount) \
1398 if (laneCount < 16) { \
1399 if (laneCount < 8) { \
1400 if (laneCount < 4) { \
1401 if (laneCount < 2) { \
1402 state[ 0] = X##ba; \
1403 if (laneCount >= 1) { \
1404 output[ 0] = X##ba; \
1405 } \
1406 state[ 1] = X##be; \
1407 state[ 2] = X##bi; \
1408 } \
1409 else { \
1410 state[ 0] = X##ba; \
1411 output[ 0] = X##ba; \
1412 state[ 1] = X##be; \
1413 output[ 1] = X##be; \
1414 state[ 2] = X##bi; \
1415 if (laneCount >= 3) { \
1416 output[ 2] = X##bi; \
1417 } \
1418 } \
1419 state[ 3] = X##bo; \
1420 state[ 4] = X##bu; \
1421 state[ 5] = X##ga; \
1422 state[ 6] = X##ge; \
1423 } \
1424 else { \
1425 state[ 0] = X##ba; \
1426 output[ 0] = X##ba; \
1427 state[ 1] = X##be; \
1428 output[ 1] = X##be; \
1429 state[ 2] = X##bi; \
1430 output[ 2] = X##bi; \
1431 state[ 3] = X##bo; \
1432 output[ 3] = X##bo; \
1433 if (laneCount < 6) { \
1434 state[ 4] = X##bu; \
1435 if (laneCount >= 5) { \
1436 output[ 4] = X##bu; \
1437 } \
1438 state[ 5] = X##ga; \
1439 state[ 6] = X##ge; \
1440 } \
1441 else { \
1442 state[ 4] = X##bu; \
1443 output[ 4] = X##bu; \
1444 state[ 5] = X##ga; \
1445 output[ 5] = X##ga; \
1446 state[ 6] = X##ge; \
1447 if (laneCount >= 7) { \
1448 output[ 6] = X##ge; \
1449 } \
1450 } \
1451 } \
1452 state[ 7] = X##gi; \
1453 state[ 8] = X##go; \
1454 state[ 9] = X##gu; \
1455 state[10] = X##ka; \
1456 state[11] = X##ke; \
1457 state[12] = X##ki; \
1458 state[13] = X##ko; \
1459 state[14] = X##ku; \
1460 } \
1461 else { \
1462 state[ 0] = X##ba; \
1463 output[ 0] = X##ba; \
1464 state[ 1] = X##be; \
1465 output[ 1] = X##be; \
1466 state[ 2] = X##bi; \
1467 output[ 2] = X##bi; \
1468 state[ 3] = X##bo; \
1469 output[ 3] = X##bo; \
1470 state[ 4] = X##bu; \
1471 output[ 4] = X##bu; \
1472 state[ 5] = X##ga; \
1473 output[ 5] = X##ga; \
1474 state[ 6] = X##ge; \
1475 output[ 6] = X##ge; \
1476 state[ 7] = X##gi; \
1477 output[ 7] = X##gi; \
1478 if (laneCount < 12) { \
1479 if (laneCount < 10) { \
1480 state[ 8] = X##go; \
1481 if (laneCount >= 9) { \
1482 output[ 8] = X##go; \
1483 } \
1484 state[ 9] = X##gu; \
1485 state[10] = X##ka; \
1486 } \
1487 else { \
1488 state[ 8] = X##go; \
1489 output[ 8] = X##go; \
1490 state[ 9] = X##gu; \
1491 output[ 9] = X##gu; \
1492 state[10] = X##ka; \
1493 if (laneCount >= 11) { \
1494 output[10] = X##ka; \
1495 } \
1496 } \
1497 state[11] = X##ke; \
1498 state[12] = X##ki; \
1499 state[13] = X##ko; \
1500 state[14] = X##ku; \
1501 } \
1502 else { \
1503 state[ 8] = X##go; \
1504 output[ 8] = X##go; \
1505 state[ 9] = X##gu; \
1506 output[ 9] = X##gu; \
1507 state[10] = X##ka; \
1508 output[10] = X##ka; \
1509 state[11] = X##ke; \
1510 output[11] = X##ke; \
1511 if (laneCount < 14) { \
1512 state[12] = X##ki; \
1513 if (laneCount >= 13) { \
1514 output[12]= X##ki; \
1515 } \
1516 state[13] = X##ko; \
1517 state[14] = X##ku; \
1518 } \
1519 else { \
1520 state[12] = X##ki; \
1521 output[12]= X##ki; \
1522 state[13] = X##ko; \
1523 output[13] = X##ko; \
1524 state[14] = X##ku; \
1525 if (laneCount >= 15) { \
1526 output[14] = X##ku; \
1527 } \
1528 } \
1529 } \
1530 } \
1531 state[15] = X##ma; \
1532 state[16] = X##me; \
1533 state[17] = X##mi; \
1534 state[18] = X##mo; \
1535 state[19] = X##mu; \
1536 state[20] = X##sa; \
1537 state[21] = X##se; \
1538 state[22] = X##si; \
1539 state[23] = X##so; \
1540 state[24] = X##su; \
1541 } \
1542 else { \
1543 state[ 0] = X##ba; \
1544 output[ 0] = X##ba; \
1545 state[ 1] = X##be; \
1546 output[ 1] = X##be; \
1547 state[ 2] = X##bi; \
1548 output[ 2] = X##bi; \
1549 state[ 3] = X##bo; \
1550 output[ 3] = X##bo; \
1551 state[ 4] = X##bu; \
1552 output[ 4] = X##bu; \
1553 state[ 5] = X##ga; \
1554 output[ 5] = X##ga; \
1555 state[ 6] = X##ge; \
1556 output[ 6] = X##ge; \
1557 state[ 7] = X##gi; \
1558 output[ 7] = X##gi; \
1559 state[ 8] = X##go; \
1560 output[ 8] = X##go; \
1561 state[ 9] = X##gu; \
1562 output[ 9] = X##gu; \
1563 state[10] = X##ka; \
1564 output[10] = X##ka; \
1565 state[11] = X##ke; \
1566 output[11] = X##ke; \
1567 state[12] = X##ki; \
1568 output[12]= X##ki; \
1569 state[13] = X##ko; \
1570 output[13] = X##ko; \
1571 state[14] = X##ku; \
1572 output[14] = X##ku; \
1573 state[15] = X##ma; \
1574 output[15] = X##ma; \
1575 if (laneCount < 24) { \
1576 if (laneCount < 20) { \
1577 if (laneCount < 18) { \
1578 state[16] = X##me; \
1579 if (laneCount >= 17) { \
1580 output[16] = X##me; \
1581 } \
1582 state[17] = X##mi; \
1583 state[18] = X##mo; \
1584 } \
1585 else { \
1586 state[16] = X##me; \
1587 output[16] = X##me; \
1588 state[17] = X##mi; \
1589 output[17] = X##mi; \
1590 state[18] = X##mo; \
1591 if (laneCount >= 19) { \
1592 output[18] = X##mo; \
1593 } \
1594 } \
1595 state[19] = X##mu; \
1596 state[20] = X##sa; \
1597 state[21] = X##se; \
1598 state[22] = X##si; \
1599 } \
1600 else { \
1601 state[16] = X##me; \
1602 output[16] = X##me; \
1603 state[17] = X##mi; \
1604 output[17] = X##mi; \
1605 state[18] = X##mo; \
1606 output[18] = X##mo; \
1607 state[19] = X##mu; \
1608 output[19] = X##mu; \
1609 if (laneCount < 22) { \
1610 state[20] = X##sa; \
1611 if (laneCount >= 21) { \
1612 output[20] = X##sa; \
1613 } \
1614 state[21] = X##se; \
1615 state[22] = X##si; \
1616 } \
1617 else { \
1618 state[20] = X##sa; \
1619 output[20] = X##sa; \
1620 state[21] = X##se; \
1621 output[21] = X##se; \
1622 state[22] = X##si; \
1623 if (laneCount >= 23) { \
1624 output[22] = X##si; \
1625 } \
1626 } \
1627 } \
1628 state[23] = X##so; \
1629 state[24] = X##su; \
1630 } \
1631 else { \
1632 state[16] = X##me; \
1633 output[16] = X##me; \
1634 state[17] = X##mi; \
1635 output[17] = X##mi; \
1636 state[18] = X##mo; \
1637 output[18] = X##mo; \
1638 state[19] = X##mu; \
1639 output[19] = X##mu; \
1640 state[20] = X##sa; \
1641 output[20] = X##sa; \
1642 state[21] = X##se; \
1643 output[21] = X##se; \
1644 state[22] = X##si; \
1645 output[22] = X##si; \
1646 state[23] = X##so; \
1647 output[23] = X##so; \
1648 state[24] = X##su; \
1649 if (laneCount >= 25) { \
1650 output[24] = X##su; \
1651 } \
1652 } \
1653 }
1654
1655#define output(X, output, laneCount) \
1656 if (laneCount < 16) { \
1657 if (laneCount < 8) { \
1658 if (laneCount < 4) { \
1659 if (laneCount < 2) { \
1660 if (laneCount >= 1) { \
1661 output[ 0] = X##ba; \
1662 } \
1663 } \
1664 else { \
1665 output[ 0] = X##ba; \
1666 output[ 1] = X##be; \
1667 if (laneCount >= 3) { \
1668 output[ 2] = X##bi; \
1669 } \
1670 } \
1671 } \
1672 else { \
1673 output[ 0] = X##ba; \
1674 output[ 1] = X##be; \
1675 output[ 2] = X##bi; \
1676 output[ 3] = X##bo; \
1677 if (laneCount < 6) { \
1678 if (laneCount >= 5) { \
1679 output[ 4] = X##bu; \
1680 } \
1681 } \
1682 else { \
1683 output[ 4] = X##bu; \
1684 output[ 5] = X##ga; \
1685 if (laneCount >= 7) { \
1686 output[ 6] = X##ge; \
1687 } \
1688 } \
1689 } \
1690 } \
1691 else { \
1692 output[ 0] = X##ba; \
1693 output[ 1] = X##be; \
1694 output[ 2] = X##bi; \
1695 output[ 3] = X##bo; \
1696 output[ 4] = X##bu; \
1697 output[ 5] = X##ga; \
1698 output[ 6] = X##ge; \
1699 output[ 7] = X##gi; \
1700 if (laneCount < 12) { \
1701 if (laneCount < 10) { \
1702 if (laneCount >= 9) { \
1703 output[ 8] = X##go; \
1704 } \
1705 } \
1706 else { \
1707 output[ 8] = X##go; \
1708 output[ 9] = X##gu; \
1709 if (laneCount >= 11) { \
1710 output[10] = X##ka; \
1711 } \
1712 } \
1713 } \
1714 else { \
1715 output[ 8] = X##go; \
1716 output[ 9] = X##gu; \
1717 output[10] = X##ka; \
1718 output[11] = X##ke; \
1719 if (laneCount < 14) { \
1720 if (laneCount >= 13) { \
1721 output[12] = X##ki; \
1722 } \
1723 } \
1724 else { \
1725 output[12] = X##ki; \
1726 output[13] = X##ko; \
1727 if (laneCount >= 15) { \
1728 output[14] = X##ku; \
1729 } \
1730 } \
1731 } \
1732 } \
1733 } \
1734 else { \
1735 output[ 0] = X##ba; \
1736 output[ 1] = X##be; \
1737 output[ 2] = X##bi; \
1738 output[ 3] = X##bo; \
1739 output[ 4] = X##bu; \
1740 output[ 5] = X##ga; \
1741 output[ 6] = X##ge; \
1742 output[ 7] = X##gi; \
1743 output[ 8] = X##go; \
1744 output[ 9] = X##gu; \
1745 output[10] = X##ka; \
1746 output[11] = X##ke; \
1747 output[12] = X##ki; \
1748 output[13] = X##ko; \
1749 output[14] = X##ku; \
1750 output[15] = X##ma; \
1751 if (laneCount < 24) { \
1752 if (laneCount < 20) { \
1753 if (laneCount < 18) { \
1754 if (laneCount >= 17) { \
1755 output[16] = X##me; \
1756 } \
1757 } \
1758 else { \
1759 output[16] = X##me; \
1760 output[17] = X##mi; \
1761 if (laneCount >= 19) { \
1762 output[18] = X##mo; \
1763 } \
1764 } \
1765 } \
1766 else { \
1767 output[16] = X##me; \
1768 output[17] = X##mi; \
1769 output[18] = X##mo; \
1770 output[19] = X##mu; \
1771 if (laneCount < 22) { \
1772 if (laneCount >= 21) { \
1773 output[20] = X##sa; \
1774 } \
1775 } \
1776 else { \
1777 output[20] = X##sa; \
1778 output[21] = X##se; \
1779 if (laneCount >= 23) { \
1780 output[22] = X##si; \
1781 } \
1782 } \
1783 } \
1784 } \
1785 else { \
1786 output[16] = X##me; \
1787 output[17] = X##mi; \
1788 output[18] = X##mo; \
1789 output[19] = X##mu; \
1790 output[20] = X##sa; \
1791 output[21] = X##se; \
1792 output[22] = X##si; \
1793 output[23] = X##so; \
1794 if (laneCount >= 25) { \
1795 output[24] = X##su; \
1796 } \
1797 } \
1798 }
1799
1800#define wrapOne(X, input, output, index, name) \
1801 X##name ^= input[index]; \
1802 output[index] = X##name;
1803
1804#define wrapOneInvert(X, input, output, index, name) \
1805 X##name ^= input[index]; \
1806 output[index] = X##name;
1807
1808#define unwrapOne(X, input, output, index, name) \
1809 output[index] = input[index] ^ X##name; \
1810 X##name ^= output[index];
1811
1812#define unwrapOneInvert(X, input, output, index, name) \
1813 output[index] = input[index] ^ X##name; \
1814 X##name ^= output[index];
1815
1816#endif
1817
1818#define wrap(X, input, output, laneCount, trailingBits) \
1819 if (laneCount < 16) { \
1820 if (laneCount < 8) { \
1821 if (laneCount < 4) { \
1822 if (laneCount < 2) { \
1823 if (laneCount < 1) { \
1824 X##ba ^= trailingBits; \
1825 } \
1826 else { \
1827 wrapOne(X, input, output, 0, ba) \
1828 X##be ^= trailingBits; \
1829 } \
1830 } \
1831 else { \
1832 wrapOne(X, input, output, 0, ba) \
1833 wrapOneInvert(X, input, output, 1, be) \
1834 if (laneCount < 3) { \
1835 X##bi ^= trailingBits; \
1836 } \
1837 else { \
1838 wrapOneInvert(X, input, output, 2, bi) \
1839 X##bo ^= trailingBits; \
1840 } \
1841 } \
1842 } \
1843 else { \
1844 wrapOne(X, input, output, 0, ba) \
1845 wrapOneInvert(X, input, output, 1, be) \
1846 wrapOneInvert(X, input, output, 2, bi) \
1847 wrapOne(X, input, output, 3, bo) \
1848 if (laneCount < 6) { \
1849 if (laneCount < 5) { \
1850 X##bu ^= trailingBits; \
1851 } \
1852 else { \
1853 wrapOne(X, input, output, 4, bu) \
1854 X##ga ^= trailingBits; \
1855 } \
1856 } \
1857 else { \
1858 wrapOne(X, input, output, 4, bu) \
1859 wrapOne(X, input, output, 5, ga) \
1860 if (laneCount < 7) { \
1861 X##ge ^= trailingBits; \
1862 } \
1863 else { \
1864 wrapOne(X, input, output, 6, ge) \
1865 X##gi ^= trailingBits; \
1866 } \
1867 } \
1868 } \
1869 } \
1870 else { \
1871 wrapOne(X, input, output, 0, ba) \
1872 wrapOneInvert(X, input, output, 1, be) \
1873 wrapOneInvert(X, input, output, 2, bi) \
1874 wrapOne(X, input, output, 3, bo) \
1875 wrapOne(X, input, output, 4, bu) \
1876 wrapOne(X, input, output, 5, ga) \
1877 wrapOne(X, input, output, 6, ge) \
1878 wrapOne(X, input, output, 7, gi) \
1879 if (laneCount < 12) { \
1880 if (laneCount < 10) { \
1881 if (laneCount < 9) { \
1882 X##go ^= trailingBits; \
1883 } \
1884 else { \
1885 wrapOneInvert(X, input, output, 8, go) \
1886 X##gu ^= trailingBits; \
1887 } \
1888 } \
1889 else { \
1890 wrapOneInvert(X, input, output, 8, go) \
1891 wrapOne(X, input, output, 9, gu) \
1892 if (laneCount < 11) { \
1893 X##ka ^= trailingBits; \
1894 } \
1895 else { \
1896 wrapOne(X, input, output, 10, ka) \
1897 X##ke ^= trailingBits; \
1898 } \
1899 } \
1900 } \
1901 else { \
1902 wrapOneInvert(X, input, output, 8, go) \
1903 wrapOne(X, input, output, 9, gu) \
1904 wrapOne(X, input, output, 10, ka) \
1905 wrapOne(X, input, output, 11, ke) \
1906 if (laneCount < 14) { \
1907 if (laneCount < 13) { \
1908 X##ki ^= trailingBits; \
1909 } \
1910 else { \
1911 wrapOneInvert(X, input, output, 12, ki) \
1912 X##ko ^= trailingBits; \
1913 } \
1914 } \
1915 else { \
1916 wrapOneInvert(X, input, output, 12, ki) \
1917 wrapOne(X, input, output, 13, ko) \
1918 if (laneCount < 15) { \
1919 X##ku ^= trailingBits; \
1920 } \
1921 else { \
1922 wrapOne(X, input, output, 14, ku) \
1923 X##ma ^= trailingBits; \
1924 } \
1925 } \
1926 } \
1927 } \
1928 } \
1929 else { \
1930 wrapOne(X, input, output, 0, ba) \
1931 wrapOneInvert(X, input, output, 1, be) \
1932 wrapOneInvert(X, input, output, 2, bi) \
1933 wrapOne(X, input, output, 3, bo) \
1934 wrapOne(X, input, output, 4, bu) \
1935 wrapOne(X, input, output, 5, ga) \
1936 wrapOne(X, input, output, 6, ge) \
1937 wrapOne(X, input, output, 7, gi) \
1938 wrapOneInvert(X, input, output, 8, go) \
1939 wrapOne(X, input, output, 9, gu) \
1940 wrapOne(X, input, output, 10, ka) \
1941 wrapOne(X, input, output, 11, ke) \
1942 wrapOneInvert(X, input, output, 12, ki) \
1943 wrapOne(X, input, output, 13, ko) \
1944 wrapOne(X, input, output, 14, ku) \
1945 wrapOne(X, input, output, 15, ma) \
1946 if (laneCount < 24) { \
1947 if (laneCount < 20) { \
1948 if (laneCount < 18) { \
1949 if (laneCount < 17) { \
1950 X##me ^= trailingBits; \
1951 } \
1952 else { \
1953 wrapOne(X, input, output, 16, me) \
1954 X##mi ^= trailingBits; \
1955 } \
1956 } \
1957 else { \
1958 wrapOne(X, input, output, 16, me) \
1959 wrapOneInvert(X, input, output, 17, mi) \
1960 if (laneCount < 19) { \
1961 X##mo ^= trailingBits; \
1962 } \
1963 else { \
1964 wrapOne(X, input, output, 18, mo) \
1965 X##mu ^= trailingBits; \
1966 } \
1967 } \
1968 } \
1969 else { \
1970 wrapOne(X, input, output, 16, me) \
1971 wrapOneInvert(X, input, output, 17, mi) \
1972 wrapOne(X, input, output, 18, mo) \
1973 wrapOne(X, input, output, 19, mu) \
1974 if (laneCount < 22) { \
1975 if (laneCount < 21) { \
1976 X##sa ^= trailingBits; \
1977 } \
1978 else { \
1979 wrapOneInvert(X, input, output, 20, sa) \
1980 X##se ^= trailingBits; \
1981 } \
1982 } \
1983 else { \
1984 wrapOneInvert(X, input, output, 20, sa) \
1985 wrapOne(X, input, output, 21, se) \
1986 if (laneCount < 23) { \
1987 X##si ^= trailingBits; \
1988 } \
1989 else { \
1990 wrapOne(X, input, output, 22, si) \
1991 X##so ^= trailingBits; \
1992 } \
1993 } \
1994 } \
1995 } \
1996 else { \
1997 wrapOne(X, input, output, 16, me) \
1998 wrapOneInvert(X, input, output, 17, mi) \
1999 wrapOne(X, input, output, 18, mo) \
2000 wrapOne(X, input, output, 19, mu) \
2001 wrapOneInvert(X, input, output, 20, sa) \
2002 wrapOne(X, input, output, 21, se) \
2003 wrapOne(X, input, output, 22, si) \
2004 wrapOne(X, input, output, 23, so) \
2005 if (laneCount < 25) { \
2006 X##su ^= trailingBits; \
2007 } \
2008 else { \
2009 wrapOne(X, input, output, 24, su) \
2010 } \
2011 } \
2012 }
2013
2014#define unwrap(X, input, output, laneCount, trailingBits) \
2015 if (laneCount < 16) { \
2016 if (laneCount < 8) { \
2017 if (laneCount < 4) { \
2018 if (laneCount < 2) { \
2019 if (laneCount < 1) { \
2020 X##ba ^= trailingBits; \
2021 } \
2022 else { \
2023 unwrapOne(X, input, output, 0, ba) \
2024 X##be ^= trailingBits; \
2025 } \
2026 } \
2027 else { \
2028 unwrapOne(X, input, output, 0, ba) \
2029 unwrapOneInvert(X, input, output, 1, be) \
2030 if (laneCount < 3) { \
2031 X##bi ^= trailingBits; \
2032 } \
2033 else { \
2034 unwrapOneInvert(X, input, output, 2, bi) \
2035 X##bo ^= trailingBits; \
2036 } \
2037 } \
2038 } \
2039 else { \
2040 unwrapOne(X, input, output, 0, ba) \
2041 unwrapOneInvert(X, input, output, 1, be) \
2042 unwrapOneInvert(X, input, output, 2, bi) \
2043 unwrapOne(X, input, output, 3, bo) \
2044 if (laneCount < 6) { \
2045 if (laneCount < 5) { \
2046 X##bu ^= trailingBits; \
2047 } \
2048 else { \
2049 unwrapOne(X, input, output, 4, bu) \
2050 X##ga ^= trailingBits; \
2051 } \
2052 } \
2053 else { \
2054 unwrapOne(X, input, output, 4, bu) \
2055 unwrapOne(X, input, output, 5, ga) \
2056 if (laneCount < 7) { \
2057 X##ge ^= trailingBits; \
2058 } \
2059 else { \
2060 unwrapOne(X, input, output, 6, ge) \
2061 X##gi ^= trailingBits; \
2062 } \
2063 } \
2064 } \
2065 } \
2066 else { \
2067 unwrapOne(X, input, output, 0, ba) \
2068 unwrapOneInvert(X, input, output, 1, be) \
2069 unwrapOneInvert(X, input, output, 2, bi) \
2070 unwrapOne(X, input, output, 3, bo) \
2071 unwrapOne(X, input, output, 4, bu) \
2072 unwrapOne(X, input, output, 5, ga) \
2073 unwrapOne(X, input, output, 6, ge) \
2074 unwrapOne(X, input, output, 7, gi) \
2075 if (laneCount < 12) { \
2076 if (laneCount < 10) { \
2077 if (laneCount < 9) { \
2078 X##go ^= trailingBits; \
2079 } \
2080 else { \
2081 unwrapOneInvert(X, input, output, 8, go) \
2082 X##gu ^= trailingBits; \
2083 } \
2084 } \
2085 else { \
2086 unwrapOneInvert(X, input, output, 8, go) \
2087 unwrapOne(X, input, output, 9, gu) \
2088 if (laneCount < 11) { \
2089 X##ka ^= trailingBits; \
2090 } \
2091 else { \
2092 unwrapOne(X, input, output, 10, ka) \
2093 X##ke ^= trailingBits; \
2094 } \
2095 } \
2096 } \
2097 else { \
2098 unwrapOneInvert(X, input, output, 8, go) \
2099 unwrapOne(X, input, output, 9, gu) \
2100 unwrapOne(X, input, output, 10, ka) \
2101 unwrapOne(X, input, output, 11, ke) \
2102 if (laneCount < 14) { \
2103 if (laneCount < 13) { \
2104 X##ki ^= trailingBits; \
2105 } \
2106 else { \
2107 unwrapOneInvert(X, input, output, 12, ki) \
2108 X##ko ^= trailingBits; \
2109 } \
2110 } \
2111 else { \
2112 unwrapOneInvert(X, input, output, 12, ki) \
2113 unwrapOne(X, input, output, 13, ko) \
2114 if (laneCount < 15) { \
2115 X##ku ^= trailingBits; \
2116 } \
2117 else { \
2118 unwrapOne(X, input, output, 14, ku) \
2119 X##ma ^= trailingBits; \
2120 } \
2121 } \
2122 } \
2123 } \
2124 } \
2125 else { \
2126 unwrapOne(X, input, output, 0, ba) \
2127 unwrapOneInvert(X, input, output, 1, be) \
2128 unwrapOneInvert(X, input, output, 2, bi) \
2129 unwrapOne(X, input, output, 3, bo) \
2130 unwrapOne(X, input, output, 4, bu) \
2131 unwrapOne(X, input, output, 5, ga) \
2132 unwrapOne(X, input, output, 6, ge) \
2133 unwrapOne(X, input, output, 7, gi) \
2134 unwrapOneInvert(X, input, output, 8, go) \
2135 unwrapOne(X, input, output, 9, gu) \
2136 unwrapOne(X, input, output, 10, ka) \
2137 unwrapOne(X, input, output, 11, ke) \
2138 unwrapOneInvert(X, input, output, 12, ki) \
2139 unwrapOne(X, input, output, 13, ko) \
2140 unwrapOne(X, input, output, 14, ku) \
2141 unwrapOne(X, input, output, 15, ma) \
2142 if (laneCount < 24) { \
2143 if (laneCount < 20) { \
2144 if (laneCount < 18) { \
2145 if (laneCount < 17) { \
2146 X##me ^= trailingBits; \
2147 } \
2148 else { \
2149 unwrapOne(X, input, output, 16, me) \
2150 X##mi ^= trailingBits; \
2151 } \
2152 } \
2153 else { \
2154 unwrapOne(X, input, output, 16, me) \
2155 unwrapOneInvert(X, input, output, 17, mi) \
2156 if (laneCount < 19) { \
2157 X##mo ^= trailingBits; \
2158 } \
2159 else { \
2160 unwrapOne(X, input, output, 18, mo) \
2161 X##mu ^= trailingBits; \
2162 } \
2163 } \
2164 } \
2165 else { \
2166 unwrapOne(X, input, output, 16, me) \
2167 unwrapOneInvert(X, input, output, 17, mi) \
2168 unwrapOne(X, input, output, 18, mo) \
2169 unwrapOne(X, input, output, 19, mu) \
2170 if (laneCount < 22) { \
2171 if (laneCount < 21) { \
2172 X##sa ^= trailingBits; \
2173 } \
2174 else { \
2175 unwrapOneInvert(X, input, output, 20, sa) \
2176 X##se ^= trailingBits; \
2177 } \
2178 } \
2179 else { \
2180 unwrapOneInvert(X, input, output, 20, sa) \
2181 unwrapOne(X, input, output, 21, se) \
2182 if (laneCount < 23) { \
2183 X##si ^= trailingBits; \
2184 } \
2185 else { \
2186 unwrapOne(X, input, output, 22, si) \
2187 X##so ^= trailingBits; \
2188 } \
2189 } \
2190 } \
2191 } \
2192 else { \
2193 unwrapOne(X, input, output, 16, me) \
2194 unwrapOneInvert(X, input, output, 17, mi) \
2195 unwrapOne(X, input, output, 18, mo) \
2196 unwrapOne(X, input, output, 19, mu) \
2197 unwrapOneInvert(X, input, output, 20, sa) \
2198 unwrapOne(X, input, output, 21, se) \
2199 unwrapOne(X, input, output, 22, si) \
2200 unwrapOne(X, input, output, 23, so) \
2201 if (laneCount < 25) { \
2202 X##su ^= trailingBits; \
2203 } \
2204 else { \
2205 unwrapOne(X, input, output, 24, su) \
2206 } \
2207 } \
2208 }
2209