1 | /* Do not include ANY system headers here. The implementation is */ |
---|
2 | /* somehow flawed - maybe something gets overlayed by definitions */ |
---|
3 | /* in the system headers. Results will become incorrect. */ |
---|
4 | |
---|
5 | #include "config_xor.h" |
---|
6 | |
---|
7 | #if defined(TIGER_64_BIT) |
---|
8 | |
---|
9 | #if defined(GCC_VERSION_MAJOR) |
---|
10 | #if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6)) |
---|
11 | #pragma GCC optimize ("O1") |
---|
12 | #endif |
---|
13 | #endif |
---|
14 | |
---|
15 | /* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */ |
---|
16 | |
---|
17 | #undef USE_MEMSET |
---|
18 | |
---|
19 | /* Big endian: */ |
---|
20 | #ifdef WORDS_BIGENDIAN |
---|
21 | #define BIG_ENDIAN |
---|
22 | #endif |
---|
23 | |
---|
24 | /* Tiger: A Fast New Hash Function |
---|
25 | * |
---|
26 | * Ross Anderson and Eli Biham |
---|
27 | * |
---|
28 | * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/): |
---|
29 | * |
---|
30 | * Tiger has no usage restrictions nor patents. It can be used freely, |
---|
31 | * with the reference implementation, with other implementations or with |
---|
32 | * a modification to the reference implementation (as long as it still |
---|
33 | * implements Tiger). We only ask you to let us know about your |
---|
34 | * implementation and to cite the origin of Tiger and of the reference |
---|
35 | * implementation. |
---|
36 | * |
---|
37 | * |
---|
38 | * The authors' home pages can be found both in |
---|
39 | * http://www.cs.technion.ac.il/~biham/ and in |
---|
40 | * http://www.cl.cam.ac.uk/users/rja14/. |
---|
41 | * The authors' email addresses are biham@cs.technion.ac.il |
---|
42 | * and rja14@cl.cam.ac.uk. |
---|
43 | */ |
---|
44 | |
---|
45 | #if defined(HAVE_LONG_64) |
---|
46 | typedef unsigned long int word64; |
---|
47 | #elif defined(HAVE_LONG_LONG_64) |
---|
48 | typedef unsigned long long int word64; |
---|
49 | #else |
---|
50 | #error No 64 bit type found ! |
---|
51 | #endif |
---|
52 | |
---|
53 | #if defined(HAVE_INT_32) |
---|
54 | typedef unsigned int sh_word32; |
---|
55 | #elif defined(HAVE_LONG_32) |
---|
56 | typedef unsigned long sh_word32; |
---|
57 | #elif defined(HAVE_SHORT_32) |
---|
58 | typedef unsigned short sh_word32; |
---|
59 | #else |
---|
60 | #error No 32 bit type found ! |
---|
61 | #endif |
---|
62 | |
---|
63 | typedef unsigned char sh_byte; |
---|
64 | |
---|
65 | #if defined(TIGER_OPT_ASM) |
---|
66 | #define TIGER_ASM64_2 1 |
---|
67 | #else |
---|
68 | #define TIGER_C 1 |
---|
69 | #endif |
---|
70 | |
---|
71 | /* The number of passes of the hash function. */ |
---|
72 | /* Three passes are recommended. */ |
---|
73 | /* Use four passes when you need extra security. */ |
---|
74 | /* Must be at least three. */ |
---|
75 | #define PASSES 3 |
---|
76 | |
---|
77 | extern word64 tiger_table[4*256]; |
---|
78 | |
---|
79 | /* Volatile can help if compiler is smart enough to use memory operand */ |
---|
80 | static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL; |
---|
81 | static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL; |
---|
82 | |
---|
83 | #define t1 (tiger_table) |
---|
84 | #define t2 (tiger_table+256) |
---|
85 | #define t3 (tiger_table+256*2) |
---|
86 | #define t4 (tiger_table+256*3) |
---|
87 | |
---|
88 | #define pass_start |
---|
89 | #define pass_end |
---|
90 | |
---|
91 | |
---|
92 | |
---|
93 | #define save_abc \ |
---|
94 | aa = a; \ |
---|
95 | bb = b; \ |
---|
96 | cc = c; |
---|
97 | |
---|
98 | #ifdef TIGER_C |
---|
99 | |
---|
100 | #define BN(x,n) (((x)>>((n)*8))&0xFF) |
---|
101 | |
---|
102 | |
---|
103 | /* Depending on outer code one of these two can be better*/ |
---|
104 | #define roundX(a,b,c,x) \ |
---|
105 | c ^= x; \ |
---|
106 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \ |
---|
107 | t3[BN(c,4)] ^ t4[BN(c,6)] ; \ |
---|
108 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \ |
---|
109 | t2[BN(c,5)] ^ t1[BN(c,7)] ; |
---|
110 | |
---|
111 | #define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4; |
---|
112 | #define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b; |
---|
113 | #define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8; |
---|
114 | |
---|
115 | #endif |
---|
116 | |
---|
117 | |
---|
118 | #ifdef TIGER_OPT_ASM |
---|
119 | |
---|
120 | #define MASK0 0xFFL |
---|
121 | #define MASK8 0xFF00L |
---|
122 | #define MASK16 0xFF0000L |
---|
123 | #define MASK32 0xFF00000000LL |
---|
124 | #define MASK40 0xFF0000000000LL |
---|
125 | #define MASK48 0xFF000000000000LL |
---|
126 | |
---|
127 | #define roundstart __asm__ ( |
---|
128 | |
---|
129 | /* a will be moved into different reg each round |
---|
130 | * using register substitution feature of GCC asm |
---|
131 | * b will be moved in 2-nd pass rounds only |
---|
132 | */ |
---|
133 | |
---|
134 | |
---|
135 | #define roundend(a,b,c,x) \ |
---|
136 | : "+r" (a), "+r" (b), "+r" (c) \ |
---|
137 | : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\ |
---|
138 | "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \ |
---|
139 | : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" ); |
---|
140 | |
---|
141 | |
---|
142 | /* c ^= x; |
---|
143 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ |
---|
144 | t3[BN(c,4)] ^ t4[BN(c,6)] ; |
---|
145 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ |
---|
146 | t2[BN(c,5)] ^ t1[BN(c,7)] ; */ |
---|
147 | |
---|
148 | #define roundX(a,b,c,x) \ |
---|
149 | " movl %10, %%ebx \n"\ |
---|
150 | " movq %11, %%rcx \n"\ |
---|
151 | " movq %13, %%rdx \n"\ |
---|
152 | " movq %6, %%r8 \n"\ |
---|
153 | " xorq %%r8, %2 \n" \ |
---|
154 | " andq %2, %%rbx \n"\ |
---|
155 | " andq %2, %%rcx \n"\ |
---|
156 | " andq %2, %%rdx \n"\ |
---|
157 | " shrl $(16-3), %%ebx \n"\ |
---|
158 | " shrq $(32-3), %%rcx \n"\ |
---|
159 | " shrq $(48-3), %%rdx \n"\ |
---|
160 | " movzbl %2b, %%eax \n"\ |
---|
161 | " movzwl %2w, %%edi \n"\ |
---|
162 | " movq (%7,%%rax,8), %%rsi \n"\ |
---|
163 | " shrl $(8), %%edi \n" \ |
---|
164 | " movq %2, %%rax \n" \ |
---|
165 | " xorq (2048*1)(%7,%%rbx), %%rsi \n"\ |
---|
166 | " movq %2, %%rbx \n"\ |
---|
167 | " shrl $24, %%eax \n"\ |
---|
168 | " andq %12, %%rbx \n"\ |
---|
169 | " xorq (2048*2)(%7,%%rcx), %%rsi \n"\ |
---|
170 | " shrq $(40-3), %%rbx \n"\ |
---|
171 | " movq %2, %%rcx \n"\ |
---|
172 | " xorq (2048*3)(%7,%%rdx), %%rsi \n"\ |
---|
173 | " movq (2048*3)(%7,%%rdi,8), %%rdx \n"\ |
---|
174 | " shrq $56, %%rcx \n"\ |
---|
175 | " xorq (2048*2)(%7,%%rax,8), %%rdx \n"\ |
---|
176 | " xorq (2048*1)(%7,%%rbx), %%rdx \n" \ |
---|
177 | " subq %%rsi, %0 \n"\ |
---|
178 | " xorq (%7,%%rcx,8), %%rdx \n"\ |
---|
179 | " addq %%rdx, %1 \n" |
---|
180 | |
---|
181 | #define round5(a,b,c,x) \ |
---|
182 | roundstart \ |
---|
183 | roundX(a,b,c,x) \ |
---|
184 | /* b*=5; */ \ |
---|
185 | "leaq (%1,%1,4), %1\n" \ |
---|
186 | roundend(a,b,c,x) |
---|
187 | |
---|
188 | |
---|
189 | #define round7(a,b,c,x) \ |
---|
190 | roundstart \ |
---|
191 | roundX(a,b,c,x) \ |
---|
192 | roundend(a,b,c,x) \ |
---|
193 | /* b*=7; */ \ |
---|
194 | __asm__ ( \ |
---|
195 | "leaq (%1,%1,8), %0\n" \ |
---|
196 | "addq %1, %1 \n" \ |
---|
197 | "subq %1, %0 " \ |
---|
198 | :"=&r" (b): "r"(b): "1" ); |
---|
199 | |
---|
200 | #define round9(a,b,c,x) \ |
---|
201 | roundstart \ |
---|
202 | roundX(a,b,c,x) \ |
---|
203 | "leaq (%1,%1,8), %1\n" \ |
---|
204 | roundend(a,b,c,x) |
---|
205 | |
---|
206 | #endif |
---|
207 | |
---|
208 | |
---|
209 | |
---|
210 | |
---|
211 | /* ============== Common macros ================== */ |
---|
212 | |
---|
213 | #define key_schedule \ |
---|
214 | x0 -= x7 ^ XOR_CONST1; \ |
---|
215 | x1 ^= x0; \ |
---|
216 | x2 += x1;\ |
---|
217 | x3 -= x2 ^ ((~x1)<<19);\ |
---|
218 | x4 ^= x3;\ |
---|
219 | x5 += x4;\ |
---|
220 | x6 -= x5 ^ ((~x4)>>23); \ |
---|
221 | x7 ^= x6; \ |
---|
222 | x0 += x7; \ |
---|
223 | x1 -= x0 ^ ((~x7)<<19); \ |
---|
224 | x2 ^= x1; \ |
---|
225 | x3 += x2; \ |
---|
226 | x4 -= x3 ^ ((~x2)>>23); \ |
---|
227 | x5 ^= x4; \ |
---|
228 | x6 += x5; \ |
---|
229 | x7 -= x6 ^ XOR_CONST2; |
---|
230 | |
---|
231 | #define pass5n(a,b,c) \ |
---|
232 | round5(a,b,c,x0) \ |
---|
233 | x0 -= x7 ^ XOR_CONST1; \ |
---|
234 | round5(b,c,a,x1) \ |
---|
235 | x1 ^= x0; \ |
---|
236 | round5(c,a,b,x2) \ |
---|
237 | x2 += x1; \ |
---|
238 | round5(a,b,c,x3) \ |
---|
239 | x3 -= x2 ^ ((~x1)<<19); \ |
---|
240 | round5(b,c,a,x4) \ |
---|
241 | x4 ^= x3; \ |
---|
242 | round5(c,a,b,x5) \ |
---|
243 | x5 += x4; \ |
---|
244 | round5(a,b,c,x6) \ |
---|
245 | x6 -= x5 ^ ((~x4)>>23); \ |
---|
246 | round5(b,c,a,x7) \ |
---|
247 | x7 ^= x6; \ |
---|
248 | x0 += x7; \ |
---|
249 | x1 -= x0 ^ ((~x7)<<19); \ |
---|
250 | x2 ^= x1; \ |
---|
251 | x3 += x2; \ |
---|
252 | x4 -= x3 ^ ((~x2)>>23); \ |
---|
253 | x5 ^= x4; \ |
---|
254 | x6 += x5; \ |
---|
255 | x7 -= x6 ^ XOR_CONST2; |
---|
256 | |
---|
257 | #define pass7n(a,b,c) \ |
---|
258 | round7(a,b,c,x0) \ |
---|
259 | x0 -= x7 ^ XOR_CONST1; \ |
---|
260 | round7(b,c,a,x1) \ |
---|
261 | x1 ^= x0; \ |
---|
262 | round7(c,a,b,x2) \ |
---|
263 | x2 += x1; \ |
---|
264 | round7(a,b,c,x3) \ |
---|
265 | x3 -= x2 ^ ((~x1)<<19); \ |
---|
266 | round7(b,c,a,x4) \ |
---|
267 | x4 ^= x3; \ |
---|
268 | round7(c,a,b,x5) \ |
---|
269 | x5 += x4; \ |
---|
270 | round7(a,b,c,x6) \ |
---|
271 | x6 -= x5 ^ ((~x4)>>23); \ |
---|
272 | round7(b,c,a,x7) \ |
---|
273 | x7 ^= x6; \ |
---|
274 | x0 += x7; \ |
---|
275 | x1 -= x0 ^ ((~x7)<<19); \ |
---|
276 | x2 ^= x1; \ |
---|
277 | x3 += x2; \ |
---|
278 | x4 -= x3 ^ ((~x2)>>23); \ |
---|
279 | x5 ^= x4; \ |
---|
280 | x6 += x5; \ |
---|
281 | x7 -= x6 ^ XOR_CONST2; |
---|
282 | |
---|
283 | #define pass5(a,b,c) \ |
---|
284 | pass_start \ |
---|
285 | round5(a,b,c,x0) \ |
---|
286 | round5(b,c,a,x1) \ |
---|
287 | round5(c,a,b,x2) \ |
---|
288 | round5(a,b,c,x3) \ |
---|
289 | round5(b,c,a,x4) \ |
---|
290 | round5(c,a,b,x5) \ |
---|
291 | round5(a,b,c,x6) \ |
---|
292 | round5(b,c,a,x7) \ |
---|
293 | pass_end |
---|
294 | |
---|
295 | #define pass7(a,b,c) \ |
---|
296 | pass_start \ |
---|
297 | round7(a,b,c,x0) \ |
---|
298 | round7(b,c,a,x1) \ |
---|
299 | round7(c,a,b,x2) \ |
---|
300 | round7(a,b,c,x3) \ |
---|
301 | round7(b,c,a,x4) \ |
---|
302 | round7(c,a,b,x5) \ |
---|
303 | round7(a,b,c,x6) \ |
---|
304 | round7(b,c,a,x7) \ |
---|
305 | pass_end |
---|
306 | |
---|
307 | |
---|
308 | #define pass9(a,b,c) \ |
---|
309 | pass_start \ |
---|
310 | round9(a,b,c,x0) \ |
---|
311 | round9(b,c,a,x1) \ |
---|
312 | round9(c,a,b,x2) \ |
---|
313 | round9(a,b,c,x3) \ |
---|
314 | round9(b,c,a,x4) \ |
---|
315 | round9(c,a,b,x5) \ |
---|
316 | round9(a,b,c,x6) \ |
---|
317 | round9(b,c,a,x7) \ |
---|
318 | pass_end |
---|
319 | |
---|
320 | #define feedforward \ |
---|
321 | a ^= aa; \ |
---|
322 | b -= bb; \ |
---|
323 | c += cc; |
---|
324 | |
---|
325 | |
---|
326 | /* This version works ok with C variant and also with new asm version |
---|
327 | * that just wastes a register r8 |
---|
328 | * reason? who knows, write forwarding is faster than keeping value |
---|
329 | * in register? :) |
---|
330 | */ |
---|
331 | #define compress \ |
---|
332 | save_abc \ |
---|
333 | pass5n(a,b,c) \ |
---|
334 | pass7n(c,a,b) \ |
---|
335 | pass9(b,c,a) \ |
---|
336 | for(pass_no=3; pass_no<PASSES; pass_no++) { \ |
---|
337 | key_schedule \ |
---|
338 | pass9(a,b,c) \ |
---|
339 | tmpa=a; a=c; c=b; b=tmpa; \ |
---|
340 | } \ |
---|
341 | feedforward |
---|
342 | |
---|
343 | #define compress_old \ |
---|
344 | save_abc \ |
---|
345 | pass5(a,b,c) \ |
---|
346 | key_schedule \ |
---|
347 | pass7(c,a,b) \ |
---|
348 | key_schedule \ |
---|
349 | pass9(b,c,a) \ |
---|
350 | for(pass_no=3; pass_no<PASSES; pass_no++) { \ |
---|
351 | key_schedule \ |
---|
352 | pass9(a,b,c) \ |
---|
353 | tmpa=a; a=c; c=b; b=tmpa; \ |
---|
354 | } \ |
---|
355 | feedforward |
---|
356 | |
---|
357 | #define tiger_compress_macro(str, state) \ |
---|
358 | { \ |
---|
359 | register word64 a, b, c; \ |
---|
360 | register word64 tmpa; \ |
---|
361 | word64 aa, bb, cc; \ |
---|
362 | word64 x0, x1, x2, x3, x4, x5, x6, x7; \ |
---|
363 | int pass_no; \ |
---|
364 | \ |
---|
365 | a = state[0]; \ |
---|
366 | b = state[1]; \ |
---|
367 | c = state[2]; \ |
---|
368 | \ |
---|
369 | x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \ |
---|
370 | x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \ |
---|
371 | \ |
---|
372 | compress; \ |
---|
373 | \ |
---|
374 | state[0] = a; \ |
---|
375 | state[1] = b; \ |
---|
376 | state[2] = c; \ |
---|
377 | } |
---|
378 | |
---|
379 | void tiger_compress(const word64 *str, word64 state[3]) |
---|
380 | { |
---|
381 | tiger_compress_macro(((word64*)str), ((word64*)state)); |
---|
382 | } |
---|
383 | |
---|
384 | void tiger_t(const word64 *str, word64 length, word64 res[3]) |
---|
385 | { |
---|
386 | register word64 i; |
---|
387 | |
---|
388 | #ifdef BIG_ENDIAN |
---|
389 | register word64 j = 0; |
---|
390 | unsigned char temp[64]; |
---|
391 | #endif |
---|
392 | |
---|
393 | /* |
---|
394 | * res[0]=0x0123456789ABCDEFLL; |
---|
395 | * res[1]=0xFEDCBA9876543210LL; |
---|
396 | * res[2]=0xF096A5B4C3B2E187LL; |
---|
397 | */ |
---|
398 | |
---|
399 | for(i=length; i>=64; i-=64) |
---|
400 | { |
---|
401 | #ifdef BIG_ENDIAN |
---|
402 | for(j=0; j<64; j++) |
---|
403 | temp[j^7] = ((sh_byte*)str)[j]; |
---|
404 | tiger_compress(((word64*)temp), res); |
---|
405 | #else |
---|
406 | tiger_compress(str, res); |
---|
407 | #endif |
---|
408 | str += 8; |
---|
409 | } |
---|
410 | } |
---|
411 | |
---|
412 | void tiger(const word64 *str, word64 length, word64 res[3]) |
---|
413 | { |
---|
414 | register word64 i; |
---|
415 | register word64 j = 0; |
---|
416 | unsigned char temp[64]; |
---|
417 | union { |
---|
418 | word64 itmp; |
---|
419 | unsigned char ctmp[8]; |
---|
420 | } uu; |
---|
421 | |
---|
422 | /* |
---|
423 | * res[0]=0x0123456789ABCDEFLL; |
---|
424 | * res[1]=0xFEDCBA9876543210LL; |
---|
425 | * res[2]=0xF096A5B4C3B2E187LL; |
---|
426 | */ |
---|
427 | |
---|
428 | for(i=length; i>=64; i-=64) |
---|
429 | { |
---|
430 | #ifdef BIG_ENDIAN |
---|
431 | for(j=0; j<64; j++) |
---|
432 | temp[j^7] = ((sh_byte*)str)[j]; |
---|
433 | tiger_compress(((word64*)temp), res); |
---|
434 | #else |
---|
435 | tiger_compress(str, res); |
---|
436 | #endif |
---|
437 | str += 8; |
---|
438 | } |
---|
439 | |
---|
440 | #ifdef BIG_ENDIAN |
---|
441 | for(j=0; j<i; j++) |
---|
442 | temp[j^7] = ((sh_byte*)str)[j]; |
---|
443 | |
---|
444 | temp[j^7] = 0x01; |
---|
445 | j++; |
---|
446 | for(; j&7; j++) |
---|
447 | temp[j^7] = 0; |
---|
448 | #else |
---|
449 | |
---|
450 | #ifndef USE_MEMSET |
---|
451 | for(j=0; j<i; j++) |
---|
452 | temp[j] = ((sh_byte*)str)[j]; |
---|
453 | #else |
---|
454 | memcpy( temp, str, j=i ); |
---|
455 | #endif |
---|
456 | temp[j++] = 0x01; |
---|
457 | for(; j&7; j++) |
---|
458 | temp[j] = 0; |
---|
459 | |
---|
460 | #endif |
---|
461 | |
---|
462 | if(j>56) |
---|
463 | { |
---|
464 | #ifndef USE_MEMSET |
---|
465 | for(; j<64; j++) |
---|
466 | temp[j] = 0; |
---|
467 | #else |
---|
468 | memset( temp+j, 0, 64-j); |
---|
469 | #endif |
---|
470 | tiger_compress(((word64*)temp), res); |
---|
471 | j=0; |
---|
472 | } |
---|
473 | |
---|
474 | #ifndef USE_MEMSET |
---|
475 | for(; j<56; j++) |
---|
476 | temp[j] = 0; |
---|
477 | #else |
---|
478 | memset( temp+j, 0, 56-j); |
---|
479 | #endif |
---|
480 | |
---|
481 | /* Avoid gcc warning for type-punned pointer |
---|
482 | */ |
---|
483 | uu.itmp = ((word64)length)<<3; |
---|
484 | for (j=0; j<8; j++) |
---|
485 | temp[56+j] = uu.ctmp[j]; |
---|
486 | |
---|
487 | tiger_compress(((word64*)temp), res); |
---|
488 | } |
---|
489 | |
---|
490 | #endif |
---|