source: trunk/src/sh_tiger1_64.c@ 456

Last change on this file since 456 was 451, checked in by katerina, 10 years ago

Fix for ticket #352 (gcc compiler bug).

File size: 10.0 KB
Line 
1/* Do not include ANY system headers here. The implementation is */
2/* somehow flawed - maybe something gets overlayed by definitions */
3/* in the system headers. Results will become incorrect. */
4
5#include "config_xor.h"
6
7#if defined(TIGER_64_BIT)
8
9#if defined(GCC_VERSION_MAJOR)
10#if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
11#pragma GCC optimize ("O1")
12#endif
13#endif
14
15/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
16
17#undef USE_MEMSET
18
19/* Big endian: */
20#ifdef WORDS_BIGENDIAN
21#define BIG_ENDIAN
22#endif
23
24/* Tiger: A Fast New Hash Function
25 *
26 * Ross Anderson and Eli Biham
27 *
28 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
29 *
30 * Tiger has no usage restrictions nor patents. It can be used freely,
31 * with the reference implementation, with other implementations or with
32 * a modification to the reference implementation (as long as it still
33 * implements Tiger). We only ask you to let us know about your
34 * implementation and to cite the origin of Tiger and of the reference
35 * implementation.
36 *
37 *
38 * The authors' home pages can be found both in
39 * http://www.cs.technion.ac.il/~biham/ and in
40 * http://www.cl.cam.ac.uk/users/rja14/.
41 * The authors' email addresses are biham@cs.technion.ac.il
42 * and rja14@cl.cam.ac.uk.
43 */
44
45#if defined(HAVE_LONG_64)
46typedef unsigned long int word64;
47#elif defined(HAVE_LONG_LONG_64)
48typedef unsigned long long int word64;
49#else
50#error No 64 bit type found !
51#endif
52
53#if defined(HAVE_INT_32)
54typedef unsigned int sh_word32;
55#elif defined(HAVE_LONG_32)
56typedef unsigned long sh_word32;
57#elif defined(HAVE_SHORT_32)
58typedef unsigned short sh_word32;
59#else
60#error No 32 bit type found !
61#endif
62
63typedef unsigned char sh_byte;
64
65#if defined(TIGER_OPT_ASM)
66#define TIGER_ASM64_2 1
67#else
68#define TIGER_C 1
69#endif
70
71/* The number of passes of the hash function. */
72/* Three passes are recommended. */
73/* Use four passes when you need extra security. */
74/* Must be at least three. */
75#define PASSES 3
76
77extern word64 tiger_table[4*256];
78
79/* Volatile can help if compiler is smart enough to use memory operand */
80static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
81static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
82
83#define t1 (tiger_table)
84#define t2 (tiger_table+256)
85#define t3 (tiger_table+256*2)
86#define t4 (tiger_table+256*3)
87
88#define pass_start
89#define pass_end
90
91
92
93#define save_abc \
94 aa = a; \
95 bb = b; \
96 cc = c;
97
98#ifdef TIGER_C
99
100#define BN(x,n) (((x)>>((n)*8))&0xFF)
101
102
103/* Depending on outer code one of these two can be better*/
104#define roundX(a,b,c,x) \
105 c ^= x; \
106 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
107 t3[BN(c,4)] ^ t4[BN(c,6)] ; \
108 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
109 t2[BN(c,5)] ^ t1[BN(c,7)] ;
110
111#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
112#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
113#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
114
115#endif
116
117
118#ifdef TIGER_OPT_ASM
119
120#define MASK0 0xFFL
121#define MASK8 0xFF00L
122#define MASK16 0xFF0000L
123#define MASK32 0xFF00000000LL
124#define MASK40 0xFF0000000000LL
125#define MASK48 0xFF000000000000LL
126
127#define roundstart __asm__ (
128
129/* a will be moved into different reg each round
130 * using register substitution feature of GCC asm
131 * b will be moved in 2-nd pass rounds only
132 */
133
134
135#define roundend(a,b,c,x) \
136 : "+r" (a), "+r" (b), "+r" (c) \
137 : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
138 "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
139 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
140
141
142/* c ^= x;
143 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
144 t3[BN(c,4)] ^ t4[BN(c,6)] ;
145 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
146 t2[BN(c,5)] ^ t1[BN(c,7)] ; */
147
148#define roundX(a,b,c,x) \
149" movl %10, %%ebx \n"\
150" movq %11, %%rcx \n"\
151" movq %13, %%rdx \n"\
152" movq %6, %%r8 \n"\
153" xorq %%r8, %2 \n" \
154" andq %2, %%rbx \n"\
155" andq %2, %%rcx \n"\
156" andq %2, %%rdx \n"\
157" shrl $(16-3), %%ebx \n"\
158" shrq $(32-3), %%rcx \n"\
159" shrq $(48-3), %%rdx \n"\
160" movzbl %2b, %%eax \n"\
161" movzwl %2w, %%edi \n"\
162" movq (%7,%%rax,8), %%rsi \n"\
163" shrl $(8), %%edi \n" \
164" movq %2, %%rax \n" \
165" xorq (2048*1)(%7,%%rbx), %%rsi \n"\
166" movq %2, %%rbx \n"\
167" shrl $24, %%eax \n"\
168" andq %12, %%rbx \n"\
169" xorq (2048*2)(%7,%%rcx), %%rsi \n"\
170" shrq $(40-3), %%rbx \n"\
171" movq %2, %%rcx \n"\
172" xorq (2048*3)(%7,%%rdx), %%rsi \n"\
173" movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
174" shrq $56, %%rcx \n"\
175" xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
176" xorq (2048*1)(%7,%%rbx), %%rdx \n" \
177" subq %%rsi, %0 \n"\
178" xorq (%7,%%rcx,8), %%rdx \n"\
179" addq %%rdx, %1 \n"
180
181#define round5(a,b,c,x) \
182 roundstart \
183 roundX(a,b,c,x) \
184 /* b*=5; */ \
185 "leaq (%1,%1,4), %1\n" \
186 roundend(a,b,c,x)
187
188
189#define round7(a,b,c,x) \
190 roundstart \
191 roundX(a,b,c,x) \
192 roundend(a,b,c,x) \
193 /* b*=7; */ \
194 __asm__ ( \
195 "leaq (%1,%1,8), %0\n" \
196 "addq %1, %1 \n" \
197 "subq %1, %0 " \
198 :"=&r" (b): "r"(b): "1" );
199
200#define round9(a,b,c,x) \
201 roundstart \
202 roundX(a,b,c,x) \
203 "leaq (%1,%1,8), %1\n" \
204 roundend(a,b,c,x)
205
206#endif
207
208
209
210
211/* ============== Common macros ================== */
212
213#define key_schedule \
214 x0 -= x7 ^ XOR_CONST1; \
215 x1 ^= x0; \
216 x2 += x1;\
217 x3 -= x2 ^ ((~x1)<<19);\
218 x4 ^= x3;\
219 x5 += x4;\
220 x6 -= x5 ^ ((~x4)>>23); \
221 x7 ^= x6; \
222 x0 += x7; \
223 x1 -= x0 ^ ((~x7)<<19); \
224 x2 ^= x1; \
225 x3 += x2; \
226 x4 -= x3 ^ ((~x2)>>23); \
227 x5 ^= x4; \
228 x6 += x5; \
229 x7 -= x6 ^ XOR_CONST2;
230
231#define pass5n(a,b,c) \
232 round5(a,b,c,x0) \
233 x0 -= x7 ^ XOR_CONST1; \
234 round5(b,c,a,x1) \
235 x1 ^= x0; \
236 round5(c,a,b,x2) \
237 x2 += x1; \
238 round5(a,b,c,x3) \
239 x3 -= x2 ^ ((~x1)<<19); \
240 round5(b,c,a,x4) \
241 x4 ^= x3; \
242 round5(c,a,b,x5) \
243 x5 += x4; \
244 round5(a,b,c,x6) \
245 x6 -= x5 ^ ((~x4)>>23); \
246 round5(b,c,a,x7) \
247 x7 ^= x6; \
248 x0 += x7; \
249 x1 -= x0 ^ ((~x7)<<19); \
250 x2 ^= x1; \
251 x3 += x2; \
252 x4 -= x3 ^ ((~x2)>>23); \
253 x5 ^= x4; \
254 x6 += x5; \
255 x7 -= x6 ^ XOR_CONST2;
256
257#define pass7n(a,b,c) \
258 round7(a,b,c,x0) \
259 x0 -= x7 ^ XOR_CONST1; \
260 round7(b,c,a,x1) \
261 x1 ^= x0; \
262 round7(c,a,b,x2) \
263 x2 += x1; \
264 round7(a,b,c,x3) \
265 x3 -= x2 ^ ((~x1)<<19); \
266 round7(b,c,a,x4) \
267 x4 ^= x3; \
268 round7(c,a,b,x5) \
269 x5 += x4; \
270 round7(a,b,c,x6) \
271 x6 -= x5 ^ ((~x4)>>23); \
272 round7(b,c,a,x7) \
273 x7 ^= x6; \
274 x0 += x7; \
275 x1 -= x0 ^ ((~x7)<<19); \
276 x2 ^= x1; \
277 x3 += x2; \
278 x4 -= x3 ^ ((~x2)>>23); \
279 x5 ^= x4; \
280 x6 += x5; \
281 x7 -= x6 ^ XOR_CONST2;
282
283#define pass5(a,b,c) \
284 pass_start \
285 round5(a,b,c,x0) \
286 round5(b,c,a,x1) \
287 round5(c,a,b,x2) \
288 round5(a,b,c,x3) \
289 round5(b,c,a,x4) \
290 round5(c,a,b,x5) \
291 round5(a,b,c,x6) \
292 round5(b,c,a,x7) \
293 pass_end
294
295#define pass7(a,b,c) \
296 pass_start \
297 round7(a,b,c,x0) \
298 round7(b,c,a,x1) \
299 round7(c,a,b,x2) \
300 round7(a,b,c,x3) \
301 round7(b,c,a,x4) \
302 round7(c,a,b,x5) \
303 round7(a,b,c,x6) \
304 round7(b,c,a,x7) \
305 pass_end
306
307
308#define pass9(a,b,c) \
309 pass_start \
310 round9(a,b,c,x0) \
311 round9(b,c,a,x1) \
312 round9(c,a,b,x2) \
313 round9(a,b,c,x3) \
314 round9(b,c,a,x4) \
315 round9(c,a,b,x5) \
316 round9(a,b,c,x6) \
317 round9(b,c,a,x7) \
318 pass_end
319
320#define feedforward \
321 a ^= aa; \
322 b -= bb; \
323 c += cc;
324
325
326/* This version works ok with C variant and also with new asm version
327 * that just wastes a register r8
328 * reason? who knows, write forwarding is faster than keeping value
329 * in register? :)
330 */
331#define compress \
332 save_abc \
333 pass5n(a,b,c) \
334 pass7n(c,a,b) \
335 pass9(b,c,a) \
336 for(pass_no=3; pass_no<PASSES; pass_no++) { \
337 key_schedule \
338 pass9(a,b,c) \
339 tmpa=a; a=c; c=b; b=tmpa; \
340 } \
341 feedforward
342
343#define compress_old \
344 save_abc \
345 pass5(a,b,c) \
346 key_schedule \
347 pass7(c,a,b) \
348 key_schedule \
349 pass9(b,c,a) \
350 for(pass_no=3; pass_no<PASSES; pass_no++) { \
351 key_schedule \
352 pass9(a,b,c) \
353 tmpa=a; a=c; c=b; b=tmpa; \
354 } \
355 feedforward
356
357#define tiger_compress_macro(str, state) \
358{ \
359 register word64 a, b, c; \
360 register word64 tmpa; \
361 word64 aa, bb, cc; \
362 word64 x0, x1, x2, x3, x4, x5, x6, x7; \
363 int pass_no; \
364\
365 a = state[0]; \
366 b = state[1]; \
367 c = state[2]; \
368\
369 x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
370 x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
371\
372 compress; \
373\
374 state[0] = a; \
375 state[1] = b; \
376 state[2] = c; \
377}
378
379void tiger_compress(const word64 *str, word64 state[3])
380{
381 tiger_compress_macro(((word64*)str), ((word64*)state));
382}
383
384void tiger_t(const word64 *str, word64 length, word64 res[3])
385{
386 register word64 i;
387
388#ifdef BIG_ENDIAN
389 register word64 j = 0;
390 unsigned char temp[64];
391#endif
392
393 /*
394 * res[0]=0x0123456789ABCDEFLL;
395 * res[1]=0xFEDCBA9876543210LL;
396 * res[2]=0xF096A5B4C3B2E187LL;
397 */
398
399 for(i=length; i>=64; i-=64)
400 {
401#ifdef BIG_ENDIAN
402 for(j=0; j<64; j++)
403 temp[j^7] = ((sh_byte*)str)[j];
404 tiger_compress(((word64*)temp), res);
405#else
406 tiger_compress(str, res);
407#endif
408 str += 8;
409 }
410}
411
412void tiger(const word64 *str, word64 length, word64 res[3])
413{
414 register word64 i;
415 register word64 j = 0;
416 unsigned char temp[64];
417 union {
418 word64 itmp;
419 unsigned char ctmp[8];
420 } uu;
421
422 /*
423 * res[0]=0x0123456789ABCDEFLL;
424 * res[1]=0xFEDCBA9876543210LL;
425 * res[2]=0xF096A5B4C3B2E187LL;
426 */
427
428 for(i=length; i>=64; i-=64)
429 {
430#ifdef BIG_ENDIAN
431 for(j=0; j<64; j++)
432 temp[j^7] = ((sh_byte*)str)[j];
433 tiger_compress(((word64*)temp), res);
434#else
435 tiger_compress(str, res);
436#endif
437 str += 8;
438 }
439
440#ifdef BIG_ENDIAN
441 for(j=0; j<i; j++)
442 temp[j^7] = ((sh_byte*)str)[j];
443
444 temp[j^7] = 0x01;
445 j++;
446 for(; j&7; j++)
447 temp[j^7] = 0;
448#else
449
450#ifndef USE_MEMSET
451 for(j=0; j<i; j++)
452 temp[j] = ((sh_byte*)str)[j];
453#else
454 memcpy( temp, str, j=i );
455#endif
456 temp[j++] = 0x01;
457 for(; j&7; j++)
458 temp[j] = 0;
459
460#endif
461
462 if(j>56)
463 {
464#ifndef USE_MEMSET
465 for(; j<64; j++)
466 temp[j] = 0;
467#else
468 memset( temp+j, 0, 64-j);
469#endif
470 tiger_compress(((word64*)temp), res);
471 j=0;
472 }
473
474#ifndef USE_MEMSET
475 for(; j<56; j++)
476 temp[j] = 0;
477#else
478 memset( temp+j, 0, 56-j);
479#endif
480
481 /* Avoid gcc warning for type-punned pointer
482 */
483 uu.itmp = ((word64)length)<<3;
484 for (j=0; j<8; j++)
485 temp[56+j] = uu.ctmp[j];
486
487 tiger_compress(((word64*)temp), res);
488}
489
490#endif
Note: See TracBrowser for help on using the repository browser.