1 | /* Do not include ANY system headers here. The implementation is */ |
2 | /* somehow flawed - maybe something gets overlayed by definitions */ |
3 | /* in the system headers. Results will become incorrect. */ |
4 | |
5 | #include "config_xor.h" |
6 | |
7 | #if defined(TIGER_64_BIT) |
8 | |
9 | #if defined(GCC_VERSION_MAJOR) |
10 | #if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6)) |
11 | #pragma GCC optimize ("O1") |
12 | #endif |
13 | #endif |
14 | |
15 | /* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */ |
16 | |
17 | #undef USE_MEMSET |
18 | |
19 | /* Big endian: */ |
20 | #ifdef WORDS_BIGENDIAN |
21 | #define BIG_ENDIAN |
22 | #endif |
23 | |
24 | /* Tiger: A Fast New Hash Function |
25 | * |
26 | * Ross Anderson and Eli Biham |
27 | * |
28 | * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/): |
29 | * |
30 | * Tiger has no usage restrictions nor patents. It can be used freely, |
31 | * with the reference implementation, with other implementations or with |
32 | * a modification to the reference implementation (as long as it still |
33 | * implements Tiger). We only ask you to let us know about your |
34 | * implementation and to cite the origin of Tiger and of the reference |
35 | * implementation. |
36 | * |
37 | * |
38 | * The authors' home pages can be found both in |
39 | * http://www.cs.technion.ac.il/~biham/ and in |
40 | * http://www.cl.cam.ac.uk/users/rja14/. |
41 | * The authors' email addresses are biham@cs.technion.ac.il |
42 | * and rja14@cl.cam.ac.uk. |
43 | */ |
44 | |
45 | #if defined(HAVE_LONG_64) |
46 | typedef unsigned long int word64; |
47 | #elif defined(HAVE_LONG_LONG_64) |
48 | typedef unsigned long long int word64; |
49 | #else |
50 | #error No 64 bit type found ! |
51 | #endif |
52 | |
53 | #if defined(HAVE_INT_32) |
54 | typedef unsigned int sh_word32; |
55 | #elif defined(HAVE_LONG_32) |
56 | typedef unsigned long sh_word32; |
57 | #elif defined(HAVE_SHORT_32) |
58 | typedef unsigned short sh_word32; |
59 | #else |
60 | #error No 32 bit type found ! |
61 | #endif |
62 | |
63 | typedef unsigned char sh_byte; |
64 | |
65 | #if defined(TIGER_OPT_ASM) |
66 | #define TIGER_ASM64_2 1 |
67 | #else |
68 | #define TIGER_C 1 |
69 | #endif |
70 | |
71 | /* The number of passes of the hash function. */ |
72 | /* Three passes are recommended. */ |
73 | /* Use four passes when you need extra security. */ |
74 | /* Must be at least three. */ |
75 | #define PASSES 3 |
76 | |
77 | extern word64 tiger_table[4*256]; |
78 | |
79 | /* Volatile can help if compiler is smart enough to use memory operand */ |
80 | static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL; |
81 | static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL; |
82 | |
83 | #define t1 (tiger_table) |
84 | #define t2 (tiger_table+256) |
85 | #define t3 (tiger_table+256*2) |
86 | #define t4 (tiger_table+256*3) |
87 | |
88 | #define pass_start |
89 | #define pass_end |
90 | |
91 | |
92 | |
93 | #define save_abc \ |
94 | aa = a; \ |
95 | bb = b; \ |
96 | cc = c; |
97 | |
98 | #ifdef TIGER_C |
99 | |
100 | #define BN(x,n) (((x)>>((n)*8))&0xFF) |
101 | |
102 | |
103 | /* Depending on outer code one of these two can be better*/ |
104 | #define roundX(a,b,c,x) \ |
105 | c ^= x; \ |
106 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \ |
107 | t3[BN(c,4)] ^ t4[BN(c,6)] ; \ |
108 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \ |
109 | t2[BN(c,5)] ^ t1[BN(c,7)] ; |
110 | |
111 | #define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4; |
112 | #define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b; |
113 | #define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8; |
114 | |
115 | #endif |
116 | |
117 | |
118 | #ifdef TIGER_OPT_ASM |
119 | |
120 | #define MASK0 0xFFL |
121 | #define MASK8 0xFF00L |
122 | #define MASK16 0xFF0000L |
123 | #define MASK32 0xFF00000000LL |
124 | #define MASK40 0xFF0000000000LL |
125 | #define MASK48 0xFF000000000000LL |
126 | |
127 | #define roundstart __asm__ ( |
128 | |
129 | /* a will be moved into different reg each round |
130 | * using register substitution feature of GCC asm |
131 | * b will be moved in 2-nd pass rounds only |
132 | */ |
133 | |
134 | |
135 | #define roundend(a,b,c,x) \ |
136 | : "+r" (a), "+r" (b), "+r" (c) \ |
137 | : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\ |
138 | "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \ |
139 | : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" ); |
140 | |
141 | |
142 | /* c ^= x; |
143 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ |
144 | t3[BN(c,4)] ^ t4[BN(c,6)] ; |
145 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ |
146 | t2[BN(c,5)] ^ t1[BN(c,7)] ; */ |
147 | |
148 | #define roundX(a,b,c,x) \ |
149 | " movl %10, %%ebx \n"\ |
150 | " movq %11, %%rcx \n"\ |
151 | " movq %13, %%rdx \n"\ |
152 | " movq %6, %%r8 \n"\ |
153 | " xorq %%r8, %2 \n" \ |
154 | " andq %2, %%rbx \n"\ |
155 | " andq %2, %%rcx \n"\ |
156 | " andq %2, %%rdx \n"\ |
157 | " shrl $(16-3), %%ebx \n"\ |
158 | " shrq $(32-3), %%rcx \n"\ |
159 | " shrq $(48-3), %%rdx \n"\ |
160 | " movzbl %2b, %%eax \n"\ |
161 | " movzwl %2w, %%edi \n"\ |
162 | " movq (%7,%%rax,8), %%rsi \n"\ |
163 | " shrl $(8), %%edi \n" \ |
164 | " movq %2, %%rax \n" \ |
165 | " xorq (2048*1)(%7,%%rbx), %%rsi \n"\ |
166 | " movq %2, %%rbx \n"\ |
167 | " shrl $24, %%eax \n"\ |
168 | " andq %12, %%rbx \n"\ |
169 | " xorq (2048*2)(%7,%%rcx), %%rsi \n"\ |
170 | " shrq $(40-3), %%rbx \n"\ |
171 | " movq %2, %%rcx \n"\ |
172 | " xorq (2048*3)(%7,%%rdx), %%rsi \n"\ |
173 | " movq (2048*3)(%7,%%rdi,8), %%rdx \n"\ |
174 | " shrq $56, %%rcx \n"\ |
175 | " xorq (2048*2)(%7,%%rax,8), %%rdx \n"\ |
176 | " xorq (2048*1)(%7,%%rbx), %%rdx \n" \ |
177 | " subq %%rsi, %0 \n"\ |
178 | " xorq (%7,%%rcx,8), %%rdx \n"\ |
179 | " addq %%rdx, %1 \n" |
180 | |
181 | #define round5(a,b,c,x) \ |
182 | roundstart \ |
183 | roundX(a,b,c,x) \ |
184 | /* b*=5; */ \ |
185 | "leaq (%1,%1,4), %1\n" \ |
186 | roundend(a,b,c,x) |
187 | |
188 | |
189 | #define round7(a,b,c,x) \ |
190 | roundstart \ |
191 | roundX(a,b,c,x) \ |
192 | roundend(a,b,c,x) \ |
193 | /* b*=7; */ \ |
194 | __asm__ ( \ |
195 | "leaq (%1,%1,8), %0\n" \ |
196 | "addq %1, %1 \n" \ |
197 | "subq %1, %0 " \ |
198 | :"=&r" (b): "r"(b): "1" ); |
199 | |
200 | #define round9(a,b,c,x) \ |
201 | roundstart \ |
202 | roundX(a,b,c,x) \ |
203 | "leaq (%1,%1,8), %1\n" \ |
204 | roundend(a,b,c,x) |
205 | |
206 | #endif |
207 | |
208 | |
209 | |
210 | |
211 | /* ============== Common macros ================== */ |
212 | |
213 | #define key_schedule \ |
214 | x0 -= x7 ^ XOR_CONST1; \ |
215 | x1 ^= x0; \ |
216 | x2 += x1;\ |
217 | x3 -= x2 ^ ((~x1)<<19);\ |
218 | x4 ^= x3;\ |
219 | x5 += x4;\ |
220 | x6 -= x5 ^ ((~x4)>>23); \ |
221 | x7 ^= x6; \ |
222 | x0 += x7; \ |
223 | x1 -= x0 ^ ((~x7)<<19); \ |
224 | x2 ^= x1; \ |
225 | x3 += x2; \ |
226 | x4 -= x3 ^ ((~x2)>>23); \ |
227 | x5 ^= x4; \ |
228 | x6 += x5; \ |
229 | x7 -= x6 ^ XOR_CONST2; |
230 | |
231 | #define pass5n(a,b,c) \ |
232 | round5(a,b,c,x0) \ |
233 | x0 -= x7 ^ XOR_CONST1; \ |
234 | round5(b,c,a,x1) \ |
235 | x1 ^= x0; \ |
236 | round5(c,a,b,x2) \ |
237 | x2 += x1; \ |
238 | round5(a,b,c,x3) \ |
239 | x3 -= x2 ^ ((~x1)<<19); \ |
240 | round5(b,c,a,x4) \ |
241 | x4 ^= x3; \ |
242 | round5(c,a,b,x5) \ |
243 | x5 += x4; \ |
244 | round5(a,b,c,x6) \ |
245 | x6 -= x5 ^ ((~x4)>>23); \ |
246 | round5(b,c,a,x7) \ |
247 | x7 ^= x6; \ |
248 | x0 += x7; \ |
249 | x1 -= x0 ^ ((~x7)<<19); \ |
250 | x2 ^= x1; \ |
251 | x3 += x2; \ |
252 | x4 -= x3 ^ ((~x2)>>23); \ |
253 | x5 ^= x4; \ |
254 | x6 += x5; \ |
255 | x7 -= x6 ^ XOR_CONST2; |
256 | |
257 | #define pass7n(a,b,c) \ |
258 | round7(a,b,c,x0) \ |
259 | x0 -= x7 ^ XOR_CONST1; \ |
260 | round7(b,c,a,x1) \ |
261 | x1 ^= x0; \ |
262 | round7(c,a,b,x2) \ |
263 | x2 += x1; \ |
264 | round7(a,b,c,x3) \ |
265 | x3 -= x2 ^ ((~x1)<<19); \ |
266 | round7(b,c,a,x4) \ |
267 | x4 ^= x3; \ |
268 | round7(c,a,b,x5) \ |
269 | x5 += x4; \ |
270 | round7(a,b,c,x6) \ |
271 | x6 -= x5 ^ ((~x4)>>23); \ |
272 | round7(b,c,a,x7) \ |
273 | x7 ^= x6; \ |
274 | x0 += x7; \ |
275 | x1 -= x0 ^ ((~x7)<<19); \ |
276 | x2 ^= x1; \ |
277 | x3 += x2; \ |
278 | x4 -= x3 ^ ((~x2)>>23); \ |
279 | x5 ^= x4; \ |
280 | x6 += x5; \ |
281 | x7 -= x6 ^ XOR_CONST2; |
282 | |
283 | #define pass5(a,b,c) \ |
284 | pass_start \ |
285 | round5(a,b,c,x0) \ |
286 | round5(b,c,a,x1) \ |
287 | round5(c,a,b,x2) \ |
288 | round5(a,b,c,x3) \ |
289 | round5(b,c,a,x4) \ |
290 | round5(c,a,b,x5) \ |
291 | round5(a,b,c,x6) \ |
292 | round5(b,c,a,x7) \ |
293 | pass_end |
294 | |
295 | #define pass7(a,b,c) \ |
296 | pass_start \ |
297 | round7(a,b,c,x0) \ |
298 | round7(b,c,a,x1) \ |
299 | round7(c,a,b,x2) \ |
300 | round7(a,b,c,x3) \ |
301 | round7(b,c,a,x4) \ |
302 | round7(c,a,b,x5) \ |
303 | round7(a,b,c,x6) \ |
304 | round7(b,c,a,x7) \ |
305 | pass_end |
306 | |
307 | |
308 | #define pass9(a,b,c) \ |
309 | pass_start \ |
310 | round9(a,b,c,x0) \ |
311 | round9(b,c,a,x1) \ |
312 | round9(c,a,b,x2) \ |
313 | round9(a,b,c,x3) \ |
314 | round9(b,c,a,x4) \ |
315 | round9(c,a,b,x5) \ |
316 | round9(a,b,c,x6) \ |
317 | round9(b,c,a,x7) \ |
318 | pass_end |
319 | |
320 | #define feedforward \ |
321 | a ^= aa; \ |
322 | b -= bb; \ |
323 | c += cc; |
324 | |
325 | |
326 | /* This version works ok with C variant and also with new asm version |
327 | * that just wastes a register r8 |
328 | * reason? who knows, write forwarding is faster than keeping value |
329 | * in register? :) |
330 | */ |
331 | #define compress \ |
332 | save_abc \ |
333 | pass5n(a,b,c) \ |
334 | pass7n(c,a,b) \ |
335 | pass9(b,c,a) \ |
336 | for(pass_no=3; pass_no<PASSES; pass_no++) { \ |
337 | key_schedule \ |
338 | pass9(a,b,c) \ |
339 | tmpa=a; a=c; c=b; b=tmpa; \ |
340 | } \ |
341 | feedforward |
342 | |
343 | #define compress_old \ |
344 | save_abc \ |
345 | pass5(a,b,c) \ |
346 | key_schedule \ |
347 | pass7(c,a,b) \ |
348 | key_schedule \ |
349 | pass9(b,c,a) \ |
350 | for(pass_no=3; pass_no<PASSES; pass_no++) { \ |
351 | key_schedule \ |
352 | pass9(a,b,c) \ |
353 | tmpa=a; a=c; c=b; b=tmpa; \ |
354 | } \ |
355 | feedforward |
356 | |
357 | #define tiger_compress_macro(str, state) \ |
358 | { \ |
359 | register word64 a, b, c; \ |
360 | register word64 tmpa; \ |
361 | word64 aa, bb, cc; \ |
362 | word64 x0, x1, x2, x3, x4, x5, x6, x7; \ |
363 | int pass_no; \ |
364 | \ |
365 | a = state[0]; \ |
366 | b = state[1]; \ |
367 | c = state[2]; \ |
368 | \ |
369 | x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \ |
370 | x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \ |
371 | \ |
372 | compress; \ |
373 | \ |
374 | state[0] = a; \ |
375 | state[1] = b; \ |
376 | state[2] = c; \ |
377 | } |
378 | |
379 | void tiger_compress(const word64 *str, word64 state[3]) |
380 | { |
381 | tiger_compress_macro(((word64*)str), ((word64*)state)); |
382 | } |
383 | |
384 | void tiger_t(const word64 *str, word64 length, word64 res[3]) |
385 | { |
386 | register word64 i; |
387 | |
388 | #ifdef BIG_ENDIAN |
389 | register word64 j = 0; |
390 | unsigned char temp[64]; |
391 | #endif |
392 | |
393 | /* |
394 | * res[0]=0x0123456789ABCDEFLL; |
395 | * res[1]=0xFEDCBA9876543210LL; |
396 | * res[2]=0xF096A5B4C3B2E187LL; |
397 | */ |
398 | |
399 | for(i=length; i>=64; i-=64) |
400 | { |
401 | #ifdef BIG_ENDIAN |
402 | for(j=0; j<64; j++) |
403 | temp[j^7] = ((sh_byte*)str)[j]; |
404 | tiger_compress(((word64*)temp), res); |
405 | #else |
406 | tiger_compress(str, res); |
407 | #endif |
408 | str += 8; |
409 | } |
410 | } |
411 | |
412 | void tiger(const word64 *str, word64 length, word64 res[3]) |
413 | { |
414 | register word64 i; |
415 | register word64 j = 0; |
416 | unsigned char temp[64]; |
417 | union { |
418 | word64 itmp; |
419 | unsigned char ctmp[8]; |
420 | } uu; |
421 | |
422 | /* |
423 | * res[0]=0x0123456789ABCDEFLL; |
424 | * res[1]=0xFEDCBA9876543210LL; |
425 | * res[2]=0xF096A5B4C3B2E187LL; |
426 | */ |
427 | |
428 | for(i=length; i>=64; i-=64) |
429 | { |
430 | #ifdef BIG_ENDIAN |
431 | for(j=0; j<64; j++) |
432 | temp[j^7] = ((sh_byte*)str)[j]; |
433 | tiger_compress(((word64*)temp), res); |
434 | #else |
435 | tiger_compress(str, res); |
436 | #endif |
437 | str += 8; |
438 | } |
439 | |
440 | #ifdef BIG_ENDIAN |
441 | for(j=0; j<i; j++) |
442 | temp[j^7] = ((sh_byte*)str)[j]; |
443 | |
444 | temp[j^7] = 0x01; |
445 | j++; |
446 | for(; j&7; j++) |
447 | temp[j^7] = 0; |
448 | #else |
449 | |
450 | #ifndef USE_MEMSET |
451 | for(j=0; j<i; j++) |
452 | temp[j] = ((sh_byte*)str)[j]; |
453 | #else |
454 | memcpy( temp, str, j=i ); |
455 | #endif |
456 | temp[j++] = 0x01; |
457 | for(; j&7; j++) |
458 | temp[j] = 0; |
459 | |
460 | #endif |
461 | |
462 | if(j>56) |
463 | { |
464 | #ifndef USE_MEMSET |
465 | for(; j<64; j++) |
466 | temp[j] = 0; |
467 | #else |
468 | memset( temp+j, 0, 64-j); |
469 | #endif |
470 | tiger_compress(((word64*)temp), res); |
471 | j=0; |
472 | } |
473 | |
474 | #ifndef USE_MEMSET |
475 | for(; j<56; j++) |
476 | temp[j] = 0; |
477 | #else |
478 | memset( temp+j, 0, 56-j); |
479 | #endif |
480 | |
481 | /* Avoid gcc warning for type-punned pointer |
482 | */ |
483 | uu.itmp = ((word64)length)<<3; |
484 | for (j=0; j<8; j++) |
485 | temp[56+j] = uu.ctmp[j]; |
486 | |
487 | tiger_compress(((word64*)temp), res); |
488 | } |
489 | |
490 | #endif |
