1 | /* Do not include ANY system headers here. The implementation is */
|
---|
2 | /* somehow flawed - maybe something gets overlayed by definitions */
|
---|
3 | /* in the system headers. Results will become incorrect. */
|
---|
4 |
|
---|
5 | #include "config_xor.h"
|
---|
6 |
|
---|
7 | #if defined(TIGER_64_BIT)
|
---|
8 |
|
---|
9 | #if defined(GCC_VERSION_MAJOR)
|
---|
10 | #if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
|
---|
11 | #pragma GCC optimize ("O1")
|
---|
12 | #endif
|
---|
13 | #endif
|
---|
14 |
|
---|
15 | /* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
|
---|
16 |
|
---|
17 | #undef USE_MEMSET
|
---|
18 |
|
---|
19 | /* Big endian: */
|
---|
20 | #ifdef WORDS_BIGENDIAN
|
---|
21 | #define BIG_ENDIAN
|
---|
22 | #endif
|
---|
23 |
|
---|
24 | /* Tiger: A Fast New Hash Function
|
---|
25 | *
|
---|
26 | * Ross Anderson and Eli Biham
|
---|
27 | *
|
---|
28 | * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
|
---|
29 | *
|
---|
30 | * Tiger has no usage restrictions nor patents. It can be used freely,
|
---|
31 | * with the reference implementation, with other implementations or with
|
---|
32 | * a modification to the reference implementation (as long as it still
|
---|
33 | * implements Tiger). We only ask you to let us know about your
|
---|
34 | * implementation and to cite the origin of Tiger and of the reference
|
---|
35 | * implementation.
|
---|
36 | *
|
---|
37 | *
|
---|
38 | * The authors' home pages can be found both in
|
---|
39 | * http://www.cs.technion.ac.il/~biham/ and in
|
---|
40 | * http://www.cl.cam.ac.uk/users/rja14/.
|
---|
41 | * The authors' email addresses are biham@cs.technion.ac.il
|
---|
42 | * and rja14@cl.cam.ac.uk.
|
---|
43 | */
|
---|
44 |
|
---|
45 | #if defined(HAVE_LONG_64)
|
---|
46 | typedef unsigned long int word64;
|
---|
47 | #elif defined(HAVE_LONG_LONG_64)
|
---|
48 | typedef unsigned long long int word64;
|
---|
49 | #else
|
---|
50 | #error No 64 bit type found !
|
---|
51 | #endif
|
---|
52 |
|
---|
53 | #if defined(HAVE_INT_32)
|
---|
54 | typedef unsigned int sh_word32;
|
---|
55 | #elif defined(HAVE_LONG_32)
|
---|
56 | typedef unsigned long sh_word32;
|
---|
57 | #elif defined(HAVE_SHORT_32)
|
---|
58 | typedef unsigned short sh_word32;
|
---|
59 | #else
|
---|
60 | #error No 32 bit type found !
|
---|
61 | #endif
|
---|
62 |
|
---|
63 | typedef unsigned char sh_byte;
|
---|
64 |
|
---|
65 | #if defined(TIGER_OPT_ASM)
|
---|
66 | #define TIGER_ASM64_2 1
|
---|
67 | #else
|
---|
68 | #define TIGER_C 1
|
---|
69 | #endif
|
---|
70 |
|
---|
71 | /* The number of passes of the hash function. */
|
---|
72 | /* Three passes are recommended. */
|
---|
73 | /* Use four passes when you need extra security. */
|
---|
74 | /* Must be at least three. */
|
---|
75 | #define PASSES 3
|
---|
76 |
|
---|
77 | extern word64 tiger_table[4*256];
|
---|
78 |
|
---|
79 | /* Volatile can help if compiler is smart enough to use memory operand */
|
---|
80 | static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
|
---|
81 | static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
|
---|
82 |
|
---|
83 | #define t1 (tiger_table)
|
---|
84 | #define t2 (tiger_table+256)
|
---|
85 | #define t3 (tiger_table+256*2)
|
---|
86 | #define t4 (tiger_table+256*3)
|
---|
87 |
|
---|
88 | #define pass_start
|
---|
89 | #define pass_end
|
---|
90 |
|
---|
91 |
|
---|
92 |
|
---|
93 | #define save_abc \
|
---|
94 | aa = a; \
|
---|
95 | bb = b; \
|
---|
96 | cc = c;
|
---|
97 |
|
---|
98 | #ifdef TIGER_C
|
---|
99 |
|
---|
100 | #define BN(x,n) (((x)>>((n)*8))&0xFF)
|
---|
101 |
|
---|
102 |
|
---|
103 | /* Depending on outer code one of these two can be better*/
|
---|
104 | #define roundX(a,b,c,x) \
|
---|
105 | c ^= x; \
|
---|
106 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
|
---|
107 | t3[BN(c,4)] ^ t4[BN(c,6)] ; \
|
---|
108 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
|
---|
109 | t2[BN(c,5)] ^ t1[BN(c,7)] ;
|
---|
110 |
|
---|
111 | #define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
|
---|
112 | #define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
|
---|
113 | #define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
|
---|
114 |
|
---|
115 | #endif
|
---|
116 |
|
---|
117 |
|
---|
118 | #ifdef TIGER_OPT_ASM
|
---|
119 |
|
---|
120 | #define MASK0 0xFFL
|
---|
121 | #define MASK8 0xFF00L
|
---|
122 | #define MASK16 0xFF0000L
|
---|
123 | #define MASK32 0xFF00000000LL
|
---|
124 | #define MASK40 0xFF0000000000LL
|
---|
125 | #define MASK48 0xFF000000000000LL
|
---|
126 |
|
---|
127 | #define roundstart __asm__ (
|
---|
128 |
|
---|
129 | /* a will be moved into different reg each round
|
---|
130 | * using register substitution feature of GCC asm
|
---|
131 | * b will be moved in 2-nd pass rounds only
|
---|
132 | */
|
---|
133 |
|
---|
134 |
|
---|
135 | #define roundend(a,b,c,x) \
|
---|
136 | : "+r" (a), "+r" (b), "+r" (c) \
|
---|
137 | : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
|
---|
138 | "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
|
---|
139 | : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
|
---|
140 |
|
---|
141 |
|
---|
142 | /* c ^= x;
|
---|
143 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
|
---|
144 | t3[BN(c,4)] ^ t4[BN(c,6)] ;
|
---|
145 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
|
---|
146 | t2[BN(c,5)] ^ t1[BN(c,7)] ; */
|
---|
147 |
|
---|
148 | #define roundX(a,b,c,x) \
|
---|
149 | " movl %10, %%ebx \n"\
|
---|
150 | " movq %11, %%rcx \n"\
|
---|
151 | " movq %13, %%rdx \n"\
|
---|
152 | " movq %6, %%r8 \n"\
|
---|
153 | " xorq %%r8, %2 \n" \
|
---|
154 | " andq %2, %%rbx \n"\
|
---|
155 | " andq %2, %%rcx \n"\
|
---|
156 | " andq %2, %%rdx \n"\
|
---|
157 | " shrl $(16-3), %%ebx \n"\
|
---|
158 | " shrq $(32-3), %%rcx \n"\
|
---|
159 | " shrq $(48-3), %%rdx \n"\
|
---|
160 | " movzbl %2b, %%eax \n"\
|
---|
161 | " movzwl %2w, %%edi \n"\
|
---|
162 | " movq (%7,%%rax,8), %%rsi \n"\
|
---|
163 | " shrl $(8), %%edi \n" \
|
---|
164 | " movq %2, %%rax \n" \
|
---|
165 | " xorq (2048*1)(%7,%%rbx), %%rsi \n"\
|
---|
166 | " movq %2, %%rbx \n"\
|
---|
167 | " shrl $24, %%eax \n"\
|
---|
168 | " andq %12, %%rbx \n"\
|
---|
169 | " xorq (2048*2)(%7,%%rcx), %%rsi \n"\
|
---|
170 | " shrq $(40-3), %%rbx \n"\
|
---|
171 | " movq %2, %%rcx \n"\
|
---|
172 | " xorq (2048*3)(%7,%%rdx), %%rsi \n"\
|
---|
173 | " movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
|
---|
174 | " shrq $56, %%rcx \n"\
|
---|
175 | " xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
|
---|
176 | " xorq (2048*1)(%7,%%rbx), %%rdx \n" \
|
---|
177 | " subq %%rsi, %0 \n"\
|
---|
178 | " xorq (%7,%%rcx,8), %%rdx \n"\
|
---|
179 | " addq %%rdx, %1 \n"
|
---|
180 |
|
---|
181 | #define round5(a,b,c,x) \
|
---|
182 | roundstart \
|
---|
183 | roundX(a,b,c,x) \
|
---|
184 | /* b*=5; */ \
|
---|
185 | "leaq (%1,%1,4), %1\n" \
|
---|
186 | roundend(a,b,c,x)
|
---|
187 |
|
---|
188 |
|
---|
189 | #define round7(a,b,c,x) \
|
---|
190 | roundstart \
|
---|
191 | roundX(a,b,c,x) \
|
---|
192 | roundend(a,b,c,x) \
|
---|
193 | /* b*=7; */ \
|
---|
194 | __asm__ ( \
|
---|
195 | "leaq (%1,%1,8), %0\n" \
|
---|
196 | "addq %1, %1 \n" \
|
---|
197 | "subq %1, %0 " \
|
---|
198 | :"=&r" (b): "r"(b): "1" );
|
---|
199 |
|
---|
200 | #define round9(a,b,c,x) \
|
---|
201 | roundstart \
|
---|
202 | roundX(a,b,c,x) \
|
---|
203 | "leaq (%1,%1,8), %1\n" \
|
---|
204 | roundend(a,b,c,x)
|
---|
205 |
|
---|
206 | #endif
|
---|
207 |
|
---|
208 |
|
---|
209 |
|
---|
210 |
|
---|
211 | /* ============== Common macros ================== */
|
---|
212 |
|
---|
213 | #define key_schedule \
|
---|
214 | x0 -= x7 ^ XOR_CONST1; \
|
---|
215 | x1 ^= x0; \
|
---|
216 | x2 += x1;\
|
---|
217 | x3 -= x2 ^ ((~x1)<<19);\
|
---|
218 | x4 ^= x3;\
|
---|
219 | x5 += x4;\
|
---|
220 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
221 | x7 ^= x6; \
|
---|
222 | x0 += x7; \
|
---|
223 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
224 | x2 ^= x1; \
|
---|
225 | x3 += x2; \
|
---|
226 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
227 | x5 ^= x4; \
|
---|
228 | x6 += x5; \
|
---|
229 | x7 -= x6 ^ XOR_CONST2;
|
---|
230 |
|
---|
231 | #define pass5n(a,b,c) \
|
---|
232 | round5(a,b,c,x0) \
|
---|
233 | x0 -= x7 ^ XOR_CONST1; \
|
---|
234 | round5(b,c,a,x1) \
|
---|
235 | x1 ^= x0; \
|
---|
236 | round5(c,a,b,x2) \
|
---|
237 | x2 += x1; \
|
---|
238 | round5(a,b,c,x3) \
|
---|
239 | x3 -= x2 ^ ((~x1)<<19); \
|
---|
240 | round5(b,c,a,x4) \
|
---|
241 | x4 ^= x3; \
|
---|
242 | round5(c,a,b,x5) \
|
---|
243 | x5 += x4; \
|
---|
244 | round5(a,b,c,x6) \
|
---|
245 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
246 | round5(b,c,a,x7) \
|
---|
247 | x7 ^= x6; \
|
---|
248 | x0 += x7; \
|
---|
249 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
250 | x2 ^= x1; \
|
---|
251 | x3 += x2; \
|
---|
252 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
253 | x5 ^= x4; \
|
---|
254 | x6 += x5; \
|
---|
255 | x7 -= x6 ^ XOR_CONST2;
|
---|
256 |
|
---|
257 | #define pass7n(a,b,c) \
|
---|
258 | round7(a,b,c,x0) \
|
---|
259 | x0 -= x7 ^ XOR_CONST1; \
|
---|
260 | round7(b,c,a,x1) \
|
---|
261 | x1 ^= x0; \
|
---|
262 | round7(c,a,b,x2) \
|
---|
263 | x2 += x1; \
|
---|
264 | round7(a,b,c,x3) \
|
---|
265 | x3 -= x2 ^ ((~x1)<<19); \
|
---|
266 | round7(b,c,a,x4) \
|
---|
267 | x4 ^= x3; \
|
---|
268 | round7(c,a,b,x5) \
|
---|
269 | x5 += x4; \
|
---|
270 | round7(a,b,c,x6) \
|
---|
271 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
272 | round7(b,c,a,x7) \
|
---|
273 | x7 ^= x6; \
|
---|
274 | x0 += x7; \
|
---|
275 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
276 | x2 ^= x1; \
|
---|
277 | x3 += x2; \
|
---|
278 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
279 | x5 ^= x4; \
|
---|
280 | x6 += x5; \
|
---|
281 | x7 -= x6 ^ XOR_CONST2;
|
---|
282 |
|
---|
283 | #define pass5(a,b,c) \
|
---|
284 | pass_start \
|
---|
285 | round5(a,b,c,x0) \
|
---|
286 | round5(b,c,a,x1) \
|
---|
287 | round5(c,a,b,x2) \
|
---|
288 | round5(a,b,c,x3) \
|
---|
289 | round5(b,c,a,x4) \
|
---|
290 | round5(c,a,b,x5) \
|
---|
291 | round5(a,b,c,x6) \
|
---|
292 | round5(b,c,a,x7) \
|
---|
293 | pass_end
|
---|
294 |
|
---|
295 | #define pass7(a,b,c) \
|
---|
296 | pass_start \
|
---|
297 | round7(a,b,c,x0) \
|
---|
298 | round7(b,c,a,x1) \
|
---|
299 | round7(c,a,b,x2) \
|
---|
300 | round7(a,b,c,x3) \
|
---|
301 | round7(b,c,a,x4) \
|
---|
302 | round7(c,a,b,x5) \
|
---|
303 | round7(a,b,c,x6) \
|
---|
304 | round7(b,c,a,x7) \
|
---|
305 | pass_end
|
---|
306 |
|
---|
307 |
|
---|
308 | #define pass9(a,b,c) \
|
---|
309 | pass_start \
|
---|
310 | round9(a,b,c,x0) \
|
---|
311 | round9(b,c,a,x1) \
|
---|
312 | round9(c,a,b,x2) \
|
---|
313 | round9(a,b,c,x3) \
|
---|
314 | round9(b,c,a,x4) \
|
---|
315 | round9(c,a,b,x5) \
|
---|
316 | round9(a,b,c,x6) \
|
---|
317 | round9(b,c,a,x7) \
|
---|
318 | pass_end
|
---|
319 |
|
---|
320 | #define feedforward \
|
---|
321 | a ^= aa; \
|
---|
322 | b -= bb; \
|
---|
323 | c += cc;
|
---|
324 |
|
---|
325 |
|
---|
326 | /* This version works ok with C variant and also with new asm version
|
---|
327 | * that just wastes a register r8
|
---|
328 | * reason? who knows, write forwarding is faster than keeping value
|
---|
329 | * in register? :)
|
---|
330 | */
|
---|
331 | #define compress \
|
---|
332 | save_abc \
|
---|
333 | pass5n(a,b,c) \
|
---|
334 | pass7n(c,a,b) \
|
---|
335 | pass9(b,c,a) \
|
---|
336 | for(pass_no=3; pass_no<PASSES; pass_no++) { \
|
---|
337 | key_schedule \
|
---|
338 | pass9(a,b,c) \
|
---|
339 | tmpa=a; a=c; c=b; b=tmpa; \
|
---|
340 | } \
|
---|
341 | feedforward
|
---|
342 |
|
---|
343 | #define compress_old \
|
---|
344 | save_abc \
|
---|
345 | pass5(a,b,c) \
|
---|
346 | key_schedule \
|
---|
347 | pass7(c,a,b) \
|
---|
348 | key_schedule \
|
---|
349 | pass9(b,c,a) \
|
---|
350 | for(pass_no=3; pass_no<PASSES; pass_no++) { \
|
---|
351 | key_schedule \
|
---|
352 | pass9(a,b,c) \
|
---|
353 | tmpa=a; a=c; c=b; b=tmpa; \
|
---|
354 | } \
|
---|
355 | feedforward
|
---|
356 |
|
---|
357 | #define tiger_compress_macro(str, state) \
|
---|
358 | { \
|
---|
359 | register word64 a, b, c; \
|
---|
360 | register word64 tmpa; \
|
---|
361 | word64 aa, bb, cc; \
|
---|
362 | word64 x0, x1, x2, x3, x4, x5, x6, x7; \
|
---|
363 | int pass_no; \
|
---|
364 | \
|
---|
365 | a = state[0]; \
|
---|
366 | b = state[1]; \
|
---|
367 | c = state[2]; \
|
---|
368 | \
|
---|
369 | x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
|
---|
370 | x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
|
---|
371 | \
|
---|
372 | compress; \
|
---|
373 | \
|
---|
374 | state[0] = a; \
|
---|
375 | state[1] = b; \
|
---|
376 | state[2] = c; \
|
---|
377 | }
|
---|
378 |
|
---|
379 | void tiger_compress(const word64 *str, word64 state[3])
|
---|
380 | {
|
---|
381 | tiger_compress_macro(((word64*)str), ((word64*)state));
|
---|
382 | }
|
---|
383 |
|
---|
384 | void tiger_t(const word64 *str, word64 length, word64 res[3])
|
---|
385 | {
|
---|
386 | register word64 i;
|
---|
387 |
|
---|
388 | #ifdef BIG_ENDIAN
|
---|
389 | register word64 j = 0;
|
---|
390 | unsigned char temp[64];
|
---|
391 | #endif
|
---|
392 |
|
---|
393 | /*
|
---|
394 | * res[0]=0x0123456789ABCDEFLL;
|
---|
395 | * res[1]=0xFEDCBA9876543210LL;
|
---|
396 | * res[2]=0xF096A5B4C3B2E187LL;
|
---|
397 | */
|
---|
398 |
|
---|
399 | for(i=length; i>=64; i-=64)
|
---|
400 | {
|
---|
401 | #ifdef BIG_ENDIAN
|
---|
402 | for(j=0; j<64; j++)
|
---|
403 | temp[j^7] = ((sh_byte*)str)[j];
|
---|
404 | tiger_compress(((word64*)temp), res);
|
---|
405 | #else
|
---|
406 | tiger_compress(str, res);
|
---|
407 | #endif
|
---|
408 | str += 8;
|
---|
409 | }
|
---|
410 | }
|
---|
411 |
|
---|
412 | void tiger(const word64 *str, word64 length, word64 res[3])
|
---|
413 | {
|
---|
414 | register word64 i;
|
---|
415 | register word64 j = 0;
|
---|
416 | unsigned char temp[64];
|
---|
417 | union {
|
---|
418 | word64 itmp;
|
---|
419 | unsigned char ctmp[8];
|
---|
420 | } uu;
|
---|
421 |
|
---|
422 | /*
|
---|
423 | * res[0]=0x0123456789ABCDEFLL;
|
---|
424 | * res[1]=0xFEDCBA9876543210LL;
|
---|
425 | * res[2]=0xF096A5B4C3B2E187LL;
|
---|
426 | */
|
---|
427 |
|
---|
428 | for(i=length; i>=64; i-=64)
|
---|
429 | {
|
---|
430 | #ifdef BIG_ENDIAN
|
---|
431 | for(j=0; j<64; j++)
|
---|
432 | temp[j^7] = ((sh_byte*)str)[j];
|
---|
433 | tiger_compress(((word64*)temp), res);
|
---|
434 | #else
|
---|
435 | tiger_compress(str, res);
|
---|
436 | #endif
|
---|
437 | str += 8;
|
---|
438 | }
|
---|
439 |
|
---|
440 | #ifdef BIG_ENDIAN
|
---|
441 | for(j=0; j<i; j++)
|
---|
442 | temp[j^7] = ((sh_byte*)str)[j];
|
---|
443 |
|
---|
444 | temp[j^7] = 0x01;
|
---|
445 | j++;
|
---|
446 | for(; j&7; j++)
|
---|
447 | temp[j^7] = 0;
|
---|
448 | #else
|
---|
449 |
|
---|
450 | #ifndef USE_MEMSET
|
---|
451 | for(j=0; j<i; j++)
|
---|
452 | temp[j] = ((sh_byte*)str)[j];
|
---|
453 | #else
|
---|
454 | memcpy( temp, str, j=i );
|
---|
455 | #endif
|
---|
456 | temp[j++] = 0x01;
|
---|
457 | for(; j&7; j++)
|
---|
458 | temp[j] = 0;
|
---|
459 |
|
---|
460 | #endif
|
---|
461 |
|
---|
462 | if(j>56)
|
---|
463 | {
|
---|
464 | #ifndef USE_MEMSET
|
---|
465 | for(; j<64; j++)
|
---|
466 | temp[j] = 0;
|
---|
467 | #else
|
---|
468 | memset( temp+j, 0, 64-j);
|
---|
469 | #endif
|
---|
470 | tiger_compress(((word64*)temp), res);
|
---|
471 | j=0;
|
---|
472 | }
|
---|
473 |
|
---|
474 | #ifndef USE_MEMSET
|
---|
475 | for(; j<56; j++)
|
---|
476 | temp[j] = 0;
|
---|
477 | #else
|
---|
478 | memset( temp+j, 0, 56-j);
|
---|
479 | #endif
|
---|
480 |
|
---|
481 | /* Avoid gcc warning for type-punned pointer
|
---|
482 | */
|
---|
483 | uu.itmp = ((word64)length)<<3;
|
---|
484 | for (j=0; j<8; j++)
|
---|
485 | temp[56+j] = uu.ctmp[j];
|
---|
486 |
|
---|
487 | tiger_compress(((word64*)temp), res);
|
---|
488 | }
|
---|
489 |
|
---|
490 | #endif
|
---|