1 | /* Do not include ANY system headers here. The implementation is */
|
---|
2 | /* somehow flawed - maybe something gets overlayed by definitions */
|
---|
3 | /* in the system headers. Results will become incorrect. */
|
---|
4 |
|
---|
5 | #include "config_xor.h"
|
---|
6 |
|
---|
7 | #if defined(__clang__)
|
---|
8 | #undef TIGER_OPT_ASM
|
---|
9 | #endif
|
---|
10 |
|
---|
11 | #if defined(TIGER_64_BIT)
|
---|
12 |
|
---|
13 | #if defined(GCC_VERSION_MAJOR) && !defined(__clang__)
|
---|
14 | #if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
|
---|
15 | #pragma GCC optimize ("O1")
|
---|
16 | #endif
|
---|
17 | #endif
|
---|
18 |
|
---|
19 |
|
---|
20 | /* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
|
---|
21 |
|
---|
22 | #undef USE_MEMSET
|
---|
23 |
|
---|
24 | /* Big endian: */
|
---|
25 | #ifdef WORDS_BIGENDIAN
|
---|
26 | #define BIG_ENDIAN
|
---|
27 | #endif
|
---|
28 |
|
---|
29 | /* Tiger: A Fast New Hash Function
|
---|
30 | *
|
---|
31 | * Ross Anderson and Eli Biham
|
---|
32 | *
|
---|
33 | * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
|
---|
34 | *
|
---|
35 | * Tiger has no usage restrictions nor patents. It can be used freely,
|
---|
36 | * with the reference implementation, with other implementations or with
|
---|
37 | * a modification to the reference implementation (as long as it still
|
---|
38 | * implements Tiger). We only ask you to let us know about your
|
---|
39 | * implementation and to cite the origin of Tiger and of the reference
|
---|
40 | * implementation.
|
---|
41 | *
|
---|
42 | *
|
---|
43 | * The authors' home pages can be found both in
|
---|
44 | * http://www.cs.technion.ac.il/~biham/ and in
|
---|
45 | * http://www.cl.cam.ac.uk/users/rja14/.
|
---|
46 | * The authors' email addresses are biham@cs.technion.ac.il
|
---|
47 | * and rja14@cl.cam.ac.uk.
|
---|
48 | */
|
---|
49 |
|
---|
50 | #if defined(HAVE_LONG_64)
|
---|
51 | typedef unsigned long int word64;
|
---|
52 | #elif defined(HAVE_LONG_LONG_64)
|
---|
53 | typedef unsigned long long int word64;
|
---|
54 | #else
|
---|
55 | #error No 64 bit type found !
|
---|
56 | #endif
|
---|
57 |
|
---|
58 | #if defined(HAVE_INT_32)
|
---|
59 | typedef unsigned int sh_word32;
|
---|
60 | #elif defined(HAVE_LONG_32)
|
---|
61 | typedef unsigned long sh_word32;
|
---|
62 | #elif defined(HAVE_SHORT_32)
|
---|
63 | typedef unsigned short sh_word32;
|
---|
64 | #else
|
---|
65 | #error No 32 bit type found !
|
---|
66 | #endif
|
---|
67 |
|
---|
68 | typedef unsigned char sh_byte;
|
---|
69 |
|
---|
70 | #if defined(TIGER_OPT_ASM)
|
---|
71 | #define TIGER_ASM64_2 1
|
---|
72 | #else
|
---|
73 | #define TIGER_C 1
|
---|
74 | #endif
|
---|
75 |
|
---|
76 | /* The number of passes of the hash function. */
|
---|
77 | /* Three passes are recommended. */
|
---|
78 | /* Use four passes when you need extra security. */
|
---|
79 | /* Must be at least three. */
|
---|
80 | #define PASSES 3
|
---|
81 |
|
---|
82 | extern word64 tiger_table[4*256];
|
---|
83 |
|
---|
84 | /* Volatile can help if compiler is smart enough to use memory operand */
|
---|
85 | static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
|
---|
86 | static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
|
---|
87 |
|
---|
88 | #define t1 (tiger_table)
|
---|
89 | #define t2 (tiger_table+256)
|
---|
90 | #define t3 (tiger_table+256*2)
|
---|
91 | #define t4 (tiger_table+256*3)
|
---|
92 |
|
---|
93 | #define pass_start
|
---|
94 | #define pass_end
|
---|
95 |
|
---|
96 |
|
---|
97 |
|
---|
98 | #define save_abc \
|
---|
99 | aa = a; \
|
---|
100 | bb = b; \
|
---|
101 | cc = c;
|
---|
102 |
|
---|
103 | #ifdef TIGER_C
|
---|
104 |
|
---|
105 | #define BN(x,n) (((x)>>((n)*8))&0xFF)
|
---|
106 |
|
---|
107 |
|
---|
108 | /* Depending on outer code one of these two can be better*/
|
---|
109 | #define roundX(a,b,c,x) \
|
---|
110 | c ^= x; \
|
---|
111 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
|
---|
112 | t3[BN(c,4)] ^ t4[BN(c,6)] ; \
|
---|
113 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
|
---|
114 | t2[BN(c,5)] ^ t1[BN(c,7)] ;
|
---|
115 |
|
---|
116 | #define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
|
---|
117 | #define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
|
---|
118 | #define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
|
---|
119 |
|
---|
120 | #endif
|
---|
121 |
|
---|
122 |
|
---|
123 | #ifdef TIGER_OPT_ASM
|
---|
124 |
|
---|
125 | #define MASK0 0xFFL
|
---|
126 | #define MASK8 0xFF00L
|
---|
127 | #define MASK16 0xFF0000L
|
---|
128 | #define MASK32 0xFF00000000LL
|
---|
129 | #define MASK40 0xFF0000000000LL
|
---|
130 | #define MASK48 0xFF000000000000LL
|
---|
131 |
|
---|
132 | #define roundstart __asm__ (
|
---|
133 |
|
---|
134 | /* a will be moved into different reg each round
|
---|
135 | * using register substitution feature of GCC asm
|
---|
136 | * b will be moved in 2-nd pass rounds only
|
---|
137 | */
|
---|
138 |
|
---|
139 |
|
---|
140 | #define roundend(a,b,c,x) \
|
---|
141 | : "+r" (a), "+r" (b), "+r" (c) \
|
---|
142 | : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
|
---|
143 | "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
|
---|
144 | : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
|
---|
145 |
|
---|
146 |
|
---|
147 | /* c ^= x;
|
---|
148 | a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
|
---|
149 | t3[BN(c,4)] ^ t4[BN(c,6)] ;
|
---|
150 | b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
|
---|
151 | t2[BN(c,5)] ^ t1[BN(c,7)] ; */
|
---|
152 |
|
---|
153 | #define roundX(a,b,c,x) \
|
---|
154 | " movl %10, %%ebx \n"\
|
---|
155 | " movq %11, %%rcx \n"\
|
---|
156 | " movq %13, %%rdx \n"\
|
---|
157 | " movq %6, %%r8 \n"\
|
---|
158 | " xorq %%r8, %2 \n" \
|
---|
159 | " andq %2, %%rbx \n"\
|
---|
160 | " andq %2, %%rcx \n"\
|
---|
161 | " andq %2, %%rdx \n"\
|
---|
162 | " shrl $(16-3), %%ebx \n"\
|
---|
163 | " shrq $(32-3), %%rcx \n"\
|
---|
164 | " shrq $(48-3), %%rdx \n"\
|
---|
165 | " movzbl %2b, %%eax \n"\
|
---|
166 | " movzwl %2w, %%edi \n"\
|
---|
167 | " movq (%7,%%rax,8), %%rsi \n"\
|
---|
168 | " shrl $(8), %%edi \n" \
|
---|
169 | " movq %2, %%rax \n" \
|
---|
170 | " xorq (2048*1)(%7,%%rbx), %%rsi \n"\
|
---|
171 | " movq %2, %%rbx \n"\
|
---|
172 | " shrl $24, %%eax \n"\
|
---|
173 | " andq %12, %%rbx \n"\
|
---|
174 | " xorq (2048*2)(%7,%%rcx), %%rsi \n"\
|
---|
175 | " shrq $(40-3), %%rbx \n"\
|
---|
176 | " movq %2, %%rcx \n"\
|
---|
177 | " xorq (2048*3)(%7,%%rdx), %%rsi \n"\
|
---|
178 | " movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
|
---|
179 | " shrq $56, %%rcx \n"\
|
---|
180 | " xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
|
---|
181 | " xorq (2048*1)(%7,%%rbx), %%rdx \n" \
|
---|
182 | " subq %%rsi, %0 \n"\
|
---|
183 | " xorq (%7,%%rcx,8), %%rdx \n"\
|
---|
184 | " addq %%rdx, %1 \n"
|
---|
185 |
|
---|
186 | #define round5(a,b,c,x) \
|
---|
187 | roundstart \
|
---|
188 | roundX(a,b,c,x) \
|
---|
189 | /* b*=5; */ \
|
---|
190 | "leaq (%1,%1,4), %1\n" \
|
---|
191 | roundend(a,b,c,x)
|
---|
192 |
|
---|
193 |
|
---|
194 | #define round7(a,b,c,x) \
|
---|
195 | roundstart \
|
---|
196 | roundX(a,b,c,x) \
|
---|
197 | roundend(a,b,c,x) \
|
---|
198 | /* b*=7; */ \
|
---|
199 | __asm__ ( \
|
---|
200 | "leaq (%1,%1,8), %0\n" \
|
---|
201 | "addq %1, %1 \n" \
|
---|
202 | "subq %1, %0 " \
|
---|
203 | :"=&r" (b): "r"(b): "1" );
|
---|
204 |
|
---|
205 | #define round9(a,b,c,x) \
|
---|
206 | roundstart \
|
---|
207 | roundX(a,b,c,x) \
|
---|
208 | "leaq (%1,%1,8), %1\n" \
|
---|
209 | roundend(a,b,c,x)
|
---|
210 |
|
---|
211 | #endif
|
---|
212 |
|
---|
213 |
|
---|
214 |
|
---|
215 |
|
---|
216 | /* ============== Common macros ================== */
|
---|
217 |
|
---|
218 | #define key_schedule \
|
---|
219 | x0 -= x7 ^ XOR_CONST1; \
|
---|
220 | x1 ^= x0; \
|
---|
221 | x2 += x1;\
|
---|
222 | x3 -= x2 ^ ((~x1)<<19);\
|
---|
223 | x4 ^= x3;\
|
---|
224 | x5 += x4;\
|
---|
225 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
226 | x7 ^= x6; \
|
---|
227 | x0 += x7; \
|
---|
228 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
229 | x2 ^= x1; \
|
---|
230 | x3 += x2; \
|
---|
231 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
232 | x5 ^= x4; \
|
---|
233 | x6 += x5; \
|
---|
234 | x7 -= x6 ^ XOR_CONST2;
|
---|
235 |
|
---|
236 | #define pass5n(a,b,c) \
|
---|
237 | round5(a,b,c,x0) \
|
---|
238 | x0 -= x7 ^ XOR_CONST1; \
|
---|
239 | round5(b,c,a,x1) \
|
---|
240 | x1 ^= x0; \
|
---|
241 | round5(c,a,b,x2) \
|
---|
242 | x2 += x1; \
|
---|
243 | round5(a,b,c,x3) \
|
---|
244 | x3 -= x2 ^ ((~x1)<<19); \
|
---|
245 | round5(b,c,a,x4) \
|
---|
246 | x4 ^= x3; \
|
---|
247 | round5(c,a,b,x5) \
|
---|
248 | x5 += x4; \
|
---|
249 | round5(a,b,c,x6) \
|
---|
250 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
251 | round5(b,c,a,x7) \
|
---|
252 | x7 ^= x6; \
|
---|
253 | x0 += x7; \
|
---|
254 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
255 | x2 ^= x1; \
|
---|
256 | x3 += x2; \
|
---|
257 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
258 | x5 ^= x4; \
|
---|
259 | x6 += x5; \
|
---|
260 | x7 -= x6 ^ XOR_CONST2;
|
---|
261 |
|
---|
262 | #define pass7n(a,b,c) \
|
---|
263 | round7(a,b,c,x0) \
|
---|
264 | x0 -= x7 ^ XOR_CONST1; \
|
---|
265 | round7(b,c,a,x1) \
|
---|
266 | x1 ^= x0; \
|
---|
267 | round7(c,a,b,x2) \
|
---|
268 | x2 += x1; \
|
---|
269 | round7(a,b,c,x3) \
|
---|
270 | x3 -= x2 ^ ((~x1)<<19); \
|
---|
271 | round7(b,c,a,x4) \
|
---|
272 | x4 ^= x3; \
|
---|
273 | round7(c,a,b,x5) \
|
---|
274 | x5 += x4; \
|
---|
275 | round7(a,b,c,x6) \
|
---|
276 | x6 -= x5 ^ ((~x4)>>23); \
|
---|
277 | round7(b,c,a,x7) \
|
---|
278 | x7 ^= x6; \
|
---|
279 | x0 += x7; \
|
---|
280 | x1 -= x0 ^ ((~x7)<<19); \
|
---|
281 | x2 ^= x1; \
|
---|
282 | x3 += x2; \
|
---|
283 | x4 -= x3 ^ ((~x2)>>23); \
|
---|
284 | x5 ^= x4; \
|
---|
285 | x6 += x5; \
|
---|
286 | x7 -= x6 ^ XOR_CONST2;
|
---|
287 |
|
---|
288 | #define pass5(a,b,c) \
|
---|
289 | pass_start \
|
---|
290 | round5(a,b,c,x0) \
|
---|
291 | round5(b,c,a,x1) \
|
---|
292 | round5(c,a,b,x2) \
|
---|
293 | round5(a,b,c,x3) \
|
---|
294 | round5(b,c,a,x4) \
|
---|
295 | round5(c,a,b,x5) \
|
---|
296 | round5(a,b,c,x6) \
|
---|
297 | round5(b,c,a,x7) \
|
---|
298 | pass_end
|
---|
299 |
|
---|
300 | #define pass7(a,b,c) \
|
---|
301 | pass_start \
|
---|
302 | round7(a,b,c,x0) \
|
---|
303 | round7(b,c,a,x1) \
|
---|
304 | round7(c,a,b,x2) \
|
---|
305 | round7(a,b,c,x3) \
|
---|
306 | round7(b,c,a,x4) \
|
---|
307 | round7(c,a,b,x5) \
|
---|
308 | round7(a,b,c,x6) \
|
---|
309 | round7(b,c,a,x7) \
|
---|
310 | pass_end
|
---|
311 |
|
---|
312 |
|
---|
313 | #define pass9(a,b,c) \
|
---|
314 | pass_start \
|
---|
315 | round9(a,b,c,x0) \
|
---|
316 | round9(b,c,a,x1) \
|
---|
317 | round9(c,a,b,x2) \
|
---|
318 | round9(a,b,c,x3) \
|
---|
319 | round9(b,c,a,x4) \
|
---|
320 | round9(c,a,b,x5) \
|
---|
321 | round9(a,b,c,x6) \
|
---|
322 | round9(b,c,a,x7) \
|
---|
323 | pass_end
|
---|
324 |
|
---|
325 | #define feedforward \
|
---|
326 | a ^= aa; \
|
---|
327 | b -= bb; \
|
---|
328 | c += cc;
|
---|
329 |
|
---|
330 |
|
---|
331 | /* This version works ok with C variant and also with new asm version
|
---|
332 | * that just wastes a register r8
|
---|
333 | * reason? who knows, write forwarding is faster than keeping value
|
---|
334 | * in register? :)
|
---|
335 | */
|
---|
336 | #define compress \
|
---|
337 | save_abc \
|
---|
338 | pass5n(a,b,c) \
|
---|
339 | pass7n(c,a,b) \
|
---|
340 | pass9(b,c,a) \
|
---|
341 | for(pass_no=3; pass_no<PASSES; pass_no++) { \
|
---|
342 | key_schedule \
|
---|
343 | pass9(a,b,c) \
|
---|
344 | tmpa=a; a=c; c=b; b=tmpa; \
|
---|
345 | } \
|
---|
346 | feedforward
|
---|
347 |
|
---|
348 | #define compress_old \
|
---|
349 | save_abc \
|
---|
350 | pass5(a,b,c) \
|
---|
351 | key_schedule \
|
---|
352 | pass7(c,a,b) \
|
---|
353 | key_schedule \
|
---|
354 | pass9(b,c,a) \
|
---|
355 | for(pass_no=3; pass_no<PASSES; pass_no++) { \
|
---|
356 | key_schedule \
|
---|
357 | pass9(a,b,c) \
|
---|
358 | tmpa=a; a=c; c=b; b=tmpa; \
|
---|
359 | } \
|
---|
360 | feedforward
|
---|
361 |
|
---|
362 | #define tiger_compress_macro(str, state) \
|
---|
363 | { \
|
---|
364 | register word64 a, b, c; \
|
---|
365 | register word64 tmpa; \
|
---|
366 | word64 aa, bb, cc; \
|
---|
367 | word64 x0, x1, x2, x3, x4, x5, x6, x7; \
|
---|
368 | int pass_no; \
|
---|
369 | \
|
---|
370 | a = state[0]; \
|
---|
371 | b = state[1]; \
|
---|
372 | c = state[2]; \
|
---|
373 | \
|
---|
374 | x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
|
---|
375 | x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
|
---|
376 | \
|
---|
377 | compress; \
|
---|
378 | \
|
---|
379 | state[0] = a; \
|
---|
380 | state[1] = b; \
|
---|
381 | state[2] = c; \
|
---|
382 | }
|
---|
383 |
|
---|
384 | void tiger_compress(const word64 *str, word64 state[3])
|
---|
385 | {
|
---|
386 | tiger_compress_macro(((const word64*)str), ((word64*)state));
|
---|
387 | }
|
---|
388 |
|
---|
389 | void tiger_t(const word64 *str, word64 length, word64 res[3])
|
---|
390 | {
|
---|
391 | register word64 i;
|
---|
392 |
|
---|
393 | #ifdef BIG_ENDIAN
|
---|
394 | register word64 j = 0;
|
---|
395 | unsigned char temp[64];
|
---|
396 | #endif
|
---|
397 |
|
---|
398 | /*
|
---|
399 | * res[0]=0x0123456789ABCDEFLL;
|
---|
400 | * res[1]=0xFEDCBA9876543210LL;
|
---|
401 | * res[2]=0xF096A5B4C3B2E187LL;
|
---|
402 | */
|
---|
403 |
|
---|
404 | for(i=length; i>=64; i-=64)
|
---|
405 | {
|
---|
406 | #ifdef BIG_ENDIAN
|
---|
407 | for(j=0; j<64; j++)
|
---|
408 | temp[j^7] = ((sh_byte*)str)[j];
|
---|
409 | tiger_compress(((word64*)temp), res);
|
---|
410 | #else
|
---|
411 | tiger_compress(str, res);
|
---|
412 | #endif
|
---|
413 | str += 8;
|
---|
414 | }
|
---|
415 | }
|
---|
416 |
|
---|
417 | void tiger(const word64 *str, word64 length, word64 res[3])
|
---|
418 | {
|
---|
419 | register word64 i;
|
---|
420 | register word64 j = 0;
|
---|
421 | union {
|
---|
422 | word64 w64_temp[8];
|
---|
423 | unsigned char temp[64];
|
---|
424 | } dd;
|
---|
425 | union {
|
---|
426 | word64 itmp;
|
---|
427 | unsigned char ctmp[8];
|
---|
428 | } uu;
|
---|
429 |
|
---|
430 | /*
|
---|
431 | * res[0]=0x0123456789ABCDEFLL;
|
---|
432 | * res[1]=0xFEDCBA9876543210LL;
|
---|
433 | * res[2]=0xF096A5B4C3B2E187LL;
|
---|
434 | */
|
---|
435 |
|
---|
436 | for(i=length; i>=64; i-=64)
|
---|
437 | {
|
---|
438 | #ifdef BIG_ENDIAN
|
---|
439 | for(j=0; j<64; j++)
|
---|
440 | dd.temp[j^7] = ((sh_byte*)str)[j];
|
---|
441 | tiger_compress((dd.w64_temp), res);
|
---|
442 | #else
|
---|
443 | tiger_compress(str, res);
|
---|
444 | #endif
|
---|
445 | str += 8;
|
---|
446 | }
|
---|
447 |
|
---|
448 | #ifdef BIG_ENDIAN
|
---|
449 | for(j=0; j<i; j++)
|
---|
450 | dd.temp[j^7] = ((sh_byte*)str)[j];
|
---|
451 |
|
---|
452 | dd.temp[j^7] = 0x01;
|
---|
453 | j++;
|
---|
454 | for(; j&7; j++)
|
---|
455 | dd.temp[j^7] = 0;
|
---|
456 | #else
|
---|
457 |
|
---|
458 | #ifndef USE_MEMSET
|
---|
459 | for(j=0; j<i; j++)
|
---|
460 | dd.temp[j] = ((const sh_byte*)str)[j];
|
---|
461 | #else
|
---|
462 | memcpy( dd.temp, str, j=i );
|
---|
463 | #endif
|
---|
464 | dd.temp[j++] = 0x01;
|
---|
465 | for(; j&7; j++)
|
---|
466 | dd.temp[j] = 0;
|
---|
467 |
|
---|
468 | #endif
|
---|
469 |
|
---|
470 | if(j>56)
|
---|
471 | {
|
---|
472 | #ifndef USE_MEMSET
|
---|
473 | for(; j<64; j++)
|
---|
474 | dd.temp[j] = 0;
|
---|
475 | #else
|
---|
476 | memset( (dd.temp)+j, 0, 64-j);
|
---|
477 | #endif
|
---|
478 | tiger_compress((dd.w64_temp), res);
|
---|
479 | j=0;
|
---|
480 | }
|
---|
481 |
|
---|
482 | #ifndef USE_MEMSET
|
---|
483 | for(; j<56; j++)
|
---|
484 | dd.temp[j] = 0;
|
---|
485 | #else
|
---|
486 | memset( (dd.temp)+j, 0, 56-j);
|
---|
487 | #endif
|
---|
488 |
|
---|
489 | /* Avoid gcc warning for type-punned pointer
|
---|
490 | */
|
---|
491 | uu.itmp = ((word64)length)<<3;
|
---|
492 | for (j=0; j<8; j++)
|
---|
493 | dd.temp[56+j] = uu.ctmp[j];
|
---|
494 |
|
---|
495 | tiger_compress((dd.w64_temp), res);
|
---|
496 | }
|
---|
497 |
|
---|
498 | #endif
|
---|