source: trunk/src/sh_tiger1_64.c @ 451

Last change on this file since 451 was 451, checked in by katerina, 8 years ago

Fix for ticket #352 (gcc compiler bug).

File size: 10.0 KB
Line 
1/* Do not include ANY system headers here. The implementation is    */
2/* somehow flawed - maybe something gets overlayed by definitions   */
3/* in the system headers. Results will become incorrect.            */
4
5#include "config_xor.h"
6
7#if defined(TIGER_64_BIT)
8
9#if defined(GCC_VERSION_MAJOR)
10#if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
11#pragma GCC optimize ("O1")
12#endif
13#endif
14
15/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
16
17#undef USE_MEMSET
18
19/* Big endian:                                         */
20#ifdef WORDS_BIGENDIAN
21#define BIG_ENDIAN
22#endif
23
24/* Tiger: A Fast New Hash Function
25 *
26 * Ross Anderson and Eli Biham
27 *
28 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
29 *
30 * Tiger has no usage restrictions nor patents. It can be used freely,
31 * with the reference implementation, with other implementations or with
32 * a modification to the reference implementation (as long as it still
33 * implements Tiger). We only ask you to let us know about your
34 * implementation and to cite the origin of Tiger and of the reference
35 * implementation.
36 *
37 *
38 * The authors' home pages can be found both in
39 * http://www.cs.technion.ac.il/~biham/ and in
40 * http://www.cl.cam.ac.uk/users/rja14/.
41 * The authors' email addresses are biham@cs.technion.ac.il
42 * and rja14@cl.cam.ac.uk.
43 */ 
44
45#if defined(HAVE_LONG_64)
46typedef unsigned long int word64;
47#elif defined(HAVE_LONG_LONG_64)
48typedef unsigned long long int word64;
49#else
50#error No 64 bit type found !
51#endif
52
53#if defined(HAVE_INT_32)
54typedef unsigned int sh_word32;
55#elif defined(HAVE_LONG_32)
56typedef unsigned long sh_word32;
57#elif defined(HAVE_SHORT_32)
58typedef unsigned short sh_word32;
59#else
60#error No 32 bit type found !
61#endif
62
63typedef unsigned char sh_byte;
64
65#if defined(TIGER_OPT_ASM)
66#define TIGER_ASM64_2 1
67#else
68#define TIGER_C 1
69#endif
70
71/* The number of passes of the hash function.              */
72/* Three passes are recommended.                           */
73/* Use four passes when you need extra security.           */
74/* Must be at least three.                                 */
75#define PASSES 3
76
77extern word64 tiger_table[4*256];
78
79/* Volatile can help if compiler is smart enough to use memory operand */
80static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
81static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
82
83#define t1 (tiger_table)
84#define t2 (tiger_table+256)
85#define t3 (tiger_table+256*2)
86#define t4 (tiger_table+256*3)
87
88#define pass_start
89#define pass_end
90
91
92
93#define save_abc \
94          aa = a; \
95          bb = b; \
96          cc = c;
97
98#ifdef TIGER_C
99
100#define BN(x,n) (((x)>>((n)*8))&0xFF)
101
102
103/* Depending on outer code one of these two can be better*/
104#define roundX(a,b,c,x) \
105        c ^= x; \
106        a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
107             t3[BN(c,4)] ^ t4[BN(c,6)] ; \
108        b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
109             t2[BN(c,5)] ^ t1[BN(c,7)] ;
110
111#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
112#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
113#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
114
115#endif
116
117
118#ifdef TIGER_OPT_ASM
119
120#define MASK0           0xFFL
121#define MASK8           0xFF00L
122#define MASK16          0xFF0000L
123#define MASK32          0xFF00000000LL
124#define MASK40          0xFF0000000000LL
125#define MASK48          0xFF000000000000LL
126
127#define roundstart      __asm__ (
128
129/* a will be moved into different reg each round
130 * using register substitution feature of  GCC asm
131 * b will be moved in 2-nd pass rounds only
132 */
133
134
135#define roundend(a,b,c,x) \
136 : "+r" (a), "+r" (b), "+r" (c) \
137 : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
138  "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
139 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8"  );
140
141
142/*      c ^= x;
143        a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
144        t3[BN(c,4)] ^ t4[BN(c,6)] ;
145        b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
146        t2[BN(c,5)] ^ t1[BN(c,7)] ;     */
147
148#define roundX(a,b,c,x)   \
149"       movl    %10, %%ebx      \n"\
150"       movq    %11, %%rcx      \n"\
151"       movq    %13, %%rdx      \n"\
152"       movq    %6, %%r8  \n"\
153"       xorq    %%r8, %2                 \n" \
154"       andq    %2, %%rbx  \n"\
155"       andq    %2, %%rcx  \n"\
156"       andq    %2, %%rdx  \n"\
157"       shrl    $(16-3), %%ebx  \n"\
158"       shrq    $(32-3), %%rcx  \n"\
159"       shrq    $(48-3), %%rdx  \n"\
160"       movzbl  %2b, %%eax      \n"\
161"       movzwl  %2w, %%edi      \n"\
162"       movq    (%7,%%rax,8), %%rsi  \n"\
163"       shrl    $(8), %%edi  \n" \
164"       movq    %2, %%rax  \n" \
165"       xorq    (2048*1)(%7,%%rbx), %%rsi  \n"\
166"       movq    %2, %%rbx  \n"\
167"       shrl    $24, %%eax \n"\
168"       andq    %12, %%rbx      \n"\
169"       xorq    (2048*2)(%7,%%rcx), %%rsi  \n"\
170"       shrq    $(40-3), %%rbx \n"\
171"       movq    %2, %%rcx  \n"\
172"       xorq    (2048*3)(%7,%%rdx), %%rsi  \n"\
173"       movq    (2048*3)(%7,%%rdi,8), %%rdx  \n"\
174"       shrq    $56, %%rcx \n"\
175"       xorq    (2048*2)(%7,%%rax,8), %%rdx  \n"\
176"       xorq    (2048*1)(%7,%%rbx), %%rdx  \n" \
177"       subq     %%rsi, %0 \n"\
178"       xorq    (%7,%%rcx,8), %%rdx  \n"\
179"       addq     %%rdx, %1 \n"
180
181#define round5(a,b,c,x) \
182        roundstart \
183        roundX(a,b,c,x) \
184        /* b*=5; */ \
185        "leaq   (%1,%1,4), %1\n" \
186        roundend(a,b,c,x)
187
188
189#define round7(a,b,c,x) \
190        roundstart \
191        roundX(a,b,c,x) \
192        roundend(a,b,c,x) \
193        /* b*=7; */ \
194        __asm__ ( \
195        "leaq   (%1,%1,8), %0\n" \
196        "addq  %1, %1 \n" \
197        "subq  %1, %0 " \
198        :"=&r" (b): "r"(b): "1" );
199
200#define round9(a,b,c,x) \
201        roundstart \
202        roundX(a,b,c,x) \
203        "leaq   (%1,%1,8), %1\n" \
204        roundend(a,b,c,x)
205
206#endif
207
208
209
210
211/* ============== Common macros ================== */
212
213#define key_schedule \
214        x0 -= x7 ^ XOR_CONST1; \
215        x1 ^= x0; \
216        x2 += x1;\
217        x3 -= x2 ^ ((~x1)<<19);\
218        x4 ^= x3;\
219        x5 += x4;\
220        x6 -= x5 ^ ((~x4)>>23); \
221        x7 ^= x6; \
222        x0 += x7; \
223        x1 -= x0 ^ ((~x7)<<19); \
224        x2 ^= x1; \
225        x3 += x2; \
226        x4 -= x3 ^ ((~x2)>>23); \
227        x5 ^= x4; \
228        x6 += x5; \
229        x7 -= x6 ^ XOR_CONST2;
230
231#define pass5n(a,b,c) \
232          round5(a,b,c,x0) \
233        x0 -= x7 ^ XOR_CONST1; \
234          round5(b,c,a,x1) \
235        x1 ^= x0; \
236          round5(c,a,b,x2) \
237        x2 += x1; \
238          round5(a,b,c,x3) \
239        x3 -= x2 ^ ((~x1)<<19); \
240          round5(b,c,a,x4) \
241        x4 ^= x3; \
242          round5(c,a,b,x5) \
243        x5 += x4; \
244          round5(a,b,c,x6) \
245        x6 -= x5 ^ ((~x4)>>23); \
246          round5(b,c,a,x7) \
247        x7 ^= x6; \
248        x0 += x7; \
249        x1 -= x0 ^ ((~x7)<<19); \
250        x2 ^= x1; \
251        x3 += x2; \
252        x4 -= x3 ^ ((~x2)>>23); \
253        x5 ^= x4; \
254        x6 += x5; \
255        x7 -= x6 ^ XOR_CONST2;
256
257#define pass7n(a,b,c) \
258          round7(a,b,c,x0) \
259        x0 -= x7 ^ XOR_CONST1; \
260          round7(b,c,a,x1) \
261        x1 ^= x0; \
262          round7(c,a,b,x2) \
263        x2 += x1; \
264          round7(a,b,c,x3) \
265        x3 -= x2 ^ ((~x1)<<19); \
266          round7(b,c,a,x4) \
267        x4 ^= x3; \
268          round7(c,a,b,x5) \
269        x5 += x4; \
270          round7(a,b,c,x6) \
271        x6 -= x5 ^ ((~x4)>>23); \
272          round7(b,c,a,x7) \
273        x7 ^= x6; \
274        x0 += x7; \
275        x1 -= x0 ^ ((~x7)<<19); \
276        x2 ^= x1; \
277        x3 += x2; \
278        x4 -= x3 ^ ((~x2)>>23); \
279        x5 ^= x4; \
280        x6 += x5; \
281        x7 -= x6 ^ XOR_CONST2;
282
283#define pass5(a,b,c) \
284        pass_start \
285          round5(a,b,c,x0) \
286          round5(b,c,a,x1) \
287          round5(c,a,b,x2) \
288          round5(a,b,c,x3) \
289          round5(b,c,a,x4) \
290          round5(c,a,b,x5) \
291          round5(a,b,c,x6) \
292          round5(b,c,a,x7) \
293        pass_end
294
295#define pass7(a,b,c) \
296        pass_start \
297          round7(a,b,c,x0) \
298          round7(b,c,a,x1) \
299          round7(c,a,b,x2) \
300          round7(a,b,c,x3) \
301          round7(b,c,a,x4) \
302          round7(c,a,b,x5) \
303          round7(a,b,c,x6) \
304          round7(b,c,a,x7) \
305        pass_end
306
307
308#define pass9(a,b,c) \
309        pass_start \
310          round9(a,b,c,x0) \
311          round9(b,c,a,x1) \
312          round9(c,a,b,x2) \
313          round9(a,b,c,x3) \
314          round9(b,c,a,x4) \
315          round9(c,a,b,x5) \
316          round9(a,b,c,x6) \
317          round9(b,c,a,x7) \
318        pass_end
319
320#define feedforward \
321          a ^= aa; \
322          b -= bb; \
323          c += cc;
324
325
326/* This version works ok with C variant and also with new asm version
327 * that just wastes a register r8
328 * reason? who knows, write forwarding is faster than keeping value
329 * in register? :)
330 */
331#define compress \
332        save_abc \
333          pass5n(a,b,c) \
334          pass7n(c,a,b) \
335          pass9(b,c,a) \
336          for(pass_no=3; pass_no<PASSES; pass_no++) { \
337                key_schedule \
338                pass9(a,b,c) \
339                tmpa=a; a=c; c=b; b=tmpa; \
340          } \
341        feedforward
342
343#define compress_old \
344        save_abc \
345          pass5(a,b,c) \
346          key_schedule \
347          pass7(c,a,b) \
348          key_schedule \
349          pass9(b,c,a) \
350          for(pass_no=3; pass_no<PASSES; pass_no++) { \
351                key_schedule \
352                pass9(a,b,c) \
353                tmpa=a; a=c; c=b; b=tmpa; \
354          } \
355        feedforward
356
357#define tiger_compress_macro(str, state) \
358{ \
359  register word64 a, b, c; \
360  register word64 tmpa; \
361  word64 aa, bb, cc; \
362  word64 x0, x1, x2, x3, x4, x5, x6, x7; \
363  int pass_no; \
364\
365  a = state[0]; \
366  b = state[1]; \
367  c = state[2]; \
368\
369  x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
370  x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
371\
372  compress; \
373\
374  state[0] = a; \
375  state[1] = b; \
376  state[2] = c; \
377}
378
379void tiger_compress(const word64 *str, word64 state[3])
380{
381  tiger_compress_macro(((word64*)str), ((word64*)state));
382}
383
384void tiger_t(const word64 *str, word64 length, word64 res[3])
385{
386  register word64 i;
387
388#ifdef BIG_ENDIAN
389  register word64 j = 0;
390  unsigned char temp[64];
391#endif
392
393  /*
394   * res[0]=0x0123456789ABCDEFLL;
395   * res[1]=0xFEDCBA9876543210LL;
396   * res[2]=0xF096A5B4C3B2E187LL;
397   */
398
399  for(i=length; i>=64; i-=64)
400    {
401#ifdef BIG_ENDIAN
402      for(j=0; j<64; j++)
403        temp[j^7] = ((sh_byte*)str)[j];
404      tiger_compress(((word64*)temp), res);
405#else
406      tiger_compress(str, res);
407#endif
408      str += 8;
409    }
410}
411
412void tiger(const word64 *str, word64 length, word64 res[3])
413{
414  register word64 i;
415  register word64 j = 0;
416  unsigned char temp[64];
417  union {
418    word64 itmp;
419    unsigned char ctmp[8];
420  } uu;
421
422  /*
423   * res[0]=0x0123456789ABCDEFLL;
424   * res[1]=0xFEDCBA9876543210LL;
425   * res[2]=0xF096A5B4C3B2E187LL;
426   */
427
428  for(i=length; i>=64; i-=64)
429    {
430#ifdef BIG_ENDIAN
431      for(j=0; j<64; j++)
432        temp[j^7] = ((sh_byte*)str)[j];
433      tiger_compress(((word64*)temp), res);
434#else
435      tiger_compress(str, res);
436#endif
437      str += 8;
438    }
439
440#ifdef BIG_ENDIAN
441  for(j=0; j<i; j++)
442    temp[j^7] = ((sh_byte*)str)[j];
443
444  temp[j^7] = 0x01;
445  j++;
446  for(; j&7; j++)
447    temp[j^7] = 0;
448#else
449
450#ifndef USE_MEMSET
451  for(j=0; j<i; j++)
452    temp[j] = ((sh_byte*)str)[j];
453#else
454  memcpy( temp, str, j=i );
455#endif
456  temp[j++] = 0x01;
457  for(; j&7; j++)
458        temp[j] = 0;
459
460#endif
461
462  if(j>56)
463    {
464#ifndef USE_MEMSET
465      for(; j<64; j++)
466        temp[j] = 0;
467#else
468      memset( temp+j, 0, 64-j);
469#endif
470      tiger_compress(((word64*)temp), res);
471      j=0;
472    }
473
474#ifndef USE_MEMSET
475  for(; j<56; j++)
476    temp[j] = 0;
477#else
478  memset( temp+j, 0, 56-j);
479#endif
480
481  /* Avoid gcc warning for type-punned pointer
482   */
483  uu.itmp = ((word64)length)<<3;
484  for (j=0; j<8; j++)
485        temp[56+j] = uu.ctmp[j];
486
487  tiger_compress(((word64*)temp), res);
488}
489
490#endif
Note: See TracBrowser for help on using the repository browser.