source: trunk/src/sh_tiger1_64.c @ 481

Last change on this file since 481 was 481, checked in by katerina, 6 years ago

Enhancements and fixes for tickets #374, #375, #376, #377, #378, and #379.

File size: 10.2 KB
RevLine 
[1]1/* Do not include ANY system headers here. The implementation is    */
2/* somehow flawed - maybe something gets overlayed by definitions   */
3/* in the system headers. Results will become incorrect.            */
4
5#include "config_xor.h"
6
[474]7#if defined(__clang__)
8#undef TIGER_OPT_ASM
9#endif
10
[18]11#if defined(TIGER_64_BIT)
[1]12
[474]13#if defined(GCC_VERSION_MAJOR) && !defined(__clang__)
[451]14#if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
15#pragma GCC optimize ("O1")
16#endif
17#endif
18
[474]19
[18]20/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
21
22#undef USE_MEMSET
23
[1]24/* Big endian:                                         */
25#ifdef WORDS_BIGENDIAN
26#define BIG_ENDIAN
27#endif
28
29/* Tiger: A Fast New Hash Function
30 *
31 * Ross Anderson and Eli Biham
32 *
33 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
34 *
35 * Tiger has no usage restrictions nor patents. It can be used freely,
36 * with the reference implementation, with other implementations or with
37 * a modification to the reference implementation (as long as it still
38 * implements Tiger). We only ask you to let us know about your
39 * implementation and to cite the origin of Tiger and of the reference
40 * implementation.
41 *
42 *
43 * The authors' home pages can be found both in
44 * http://www.cs.technion.ac.il/~biham/ and in
45 * http://www.cl.cam.ac.uk/users/rja14/.
46 * The authors' email addresses are biham@cs.technion.ac.il
47 * and rja14@cl.cam.ac.uk.
48 */ 
49
50#if defined(HAVE_LONG_64)
51typedef unsigned long int word64;
[18]52#elif defined(HAVE_LONG_LONG_64)
53typedef unsigned long long int word64;
[1]54#else
[18]55#error No 64 bit type found !
[1]56#endif
57
58#if defined(HAVE_INT_32)
59typedef unsigned int sh_word32;
60#elif defined(HAVE_LONG_32)
61typedef unsigned long sh_word32;
62#elif defined(HAVE_SHORT_32)
63typedef unsigned short sh_word32;
64#else
[18]65#error No 32 bit type found !
[1]66#endif
67
68typedef unsigned char sh_byte;
69
[18]70#if defined(TIGER_OPT_ASM)
71#define TIGER_ASM64_2 1
72#else
73#define TIGER_C 1
[1]74#endif
75
[18]76/* The number of passes of the hash function.              */
77/* Three passes are recommended.                           */
78/* Use four passes when you need extra security.           */
79/* Must be at least three.                                 */
[1]80#define PASSES 3
81
82extern word64 tiger_table[4*256];
83
[18]84/* Volatile can help if compiler is smart enough to use memory operand */
85static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
86static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
87
[1]88#define t1 (tiger_table)
89#define t2 (tiger_table+256)
90#define t3 (tiger_table+256*2)
91#define t4 (tiger_table+256*3)
92
[18]93#define pass_start
94#define pass_end
95
96
97
[1]98#define save_abc \
[18]99          aa = a; \
100          bb = b; \
101          cc = c;
[1]102
[18]103#ifdef TIGER_C
104
105#define BN(x,n) (((x)>>((n)*8))&0xFF)
106
107
108/* Depending on outer code one of these two can be better*/
109#define roundX(a,b,c,x) \
110        c ^= x; \
111        a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
112             t3[BN(c,4)] ^ t4[BN(c,6)] ; \
113        b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
114             t2[BN(c,5)] ^ t1[BN(c,7)] ;
115
116#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
117#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
118#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
119
[1]120#endif
121
122
[18]123#ifdef TIGER_OPT_ASM
124
125#define MASK0           0xFFL
126#define MASK8           0xFF00L
127#define MASK16          0xFF0000L
128#define MASK32          0xFF00000000LL
129#define MASK40          0xFF0000000000LL
130#define MASK48          0xFF000000000000LL
131
132#define roundstart      __asm__ (
133
134/* a will be moved into different reg each round
135 * using register substitution feature of  GCC asm
136 * b will be moved in 2-nd pass rounds only
137 */
138
139
140#define roundend(a,b,c,x) \
141 : "+r" (a), "+r" (b), "+r" (c) \
[46]142 : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
[18]143  "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
144 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8"  );
145
146
147/*      c ^= x;
148        a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
149        t3[BN(c,4)] ^ t4[BN(c,6)] ;
150        b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
151        t2[BN(c,5)] ^ t1[BN(c,7)] ;     */
152
153#define roundX(a,b,c,x)   \
154"       movl    %10, %%ebx      \n"\
155"       movq    %11, %%rcx      \n"\
156"       movq    %13, %%rdx      \n"\
157"       movq    %6, %%r8  \n"\
158"       xorq    %%r8, %2                 \n" \
159"       andq    %2, %%rbx  \n"\
160"       andq    %2, %%rcx  \n"\
161"       andq    %2, %%rdx  \n"\
162"       shrl    $(16-3), %%ebx  \n"\
163"       shrq    $(32-3), %%rcx  \n"\
164"       shrq    $(48-3), %%rdx  \n"\
165"       movzbl  %2b, %%eax      \n"\
166"       movzwl  %2w, %%edi      \n"\
167"       movq    (%7,%%rax,8), %%rsi  \n"\
168"       shrl    $(8), %%edi  \n" \
169"       movq    %2, %%rax  \n" \
170"       xorq    (2048*1)(%7,%%rbx), %%rsi  \n"\
171"       movq    %2, %%rbx  \n"\
172"       shrl    $24, %%eax \n"\
173"       andq    %12, %%rbx      \n"\
174"       xorq    (2048*2)(%7,%%rcx), %%rsi  \n"\
175"       shrq    $(40-3), %%rbx \n"\
176"       movq    %2, %%rcx  \n"\
177"       xorq    (2048*3)(%7,%%rdx), %%rsi  \n"\
178"       movq    (2048*3)(%7,%%rdi,8), %%rdx  \n"\
179"       shrq    $56, %%rcx \n"\
180"       xorq    (2048*2)(%7,%%rax,8), %%rdx  \n"\
181"       xorq    (2048*1)(%7,%%rbx), %%rdx  \n" \
182"       subq     %%rsi, %0 \n"\
183"       xorq    (%7,%%rcx,8), %%rdx  \n"\
184"       addq     %%rdx, %1 \n"
185
186#define round5(a,b,c,x) \
187        roundstart \
188        roundX(a,b,c,x) \
189        /* b*=5; */ \
190        "leaq   (%1,%1,4), %1\n" \
191        roundend(a,b,c,x)
192
193
194#define round7(a,b,c,x) \
195        roundstart \
196        roundX(a,b,c,x) \
197        roundend(a,b,c,x) \
198        /* b*=7; */ \
199        __asm__ ( \
200        "leaq   (%1,%1,8), %0\n" \
201        "addq  %1, %1 \n" \
202        "subq  %1, %0 " \
203        :"=&r" (b): "r"(b): "1" );
204
205#define round9(a,b,c,x) \
206        roundstart \
207        roundX(a,b,c,x) \
208        "leaq   (%1,%1,8), %1\n" \
209        roundend(a,b,c,x)
210
211#endif
212
213
214
215
216/* ============== Common macros ================== */
217
[1]218#define key_schedule \
[18]219        x0 -= x7 ^ XOR_CONST1; \
220        x1 ^= x0; \
221        x2 += x1;\
222        x3 -= x2 ^ ((~x1)<<19);\
223        x4 ^= x3;\
224        x5 += x4;\
225        x6 -= x5 ^ ((~x4)>>23); \
226        x7 ^= x6; \
227        x0 += x7; \
228        x1 -= x0 ^ ((~x7)<<19); \
229        x2 ^= x1; \
230        x3 += x2; \
231        x4 -= x3 ^ ((~x2)>>23); \
232        x5 ^= x4; \
233        x6 += x5; \
234        x7 -= x6 ^ XOR_CONST2;
[1]235
[18]236#define pass5n(a,b,c) \
237          round5(a,b,c,x0) \
238        x0 -= x7 ^ XOR_CONST1; \
239          round5(b,c,a,x1) \
240        x1 ^= x0; \
241          round5(c,a,b,x2) \
242        x2 += x1; \
243          round5(a,b,c,x3) \
244        x3 -= x2 ^ ((~x1)<<19); \
245          round5(b,c,a,x4) \
246        x4 ^= x3; \
247          round5(c,a,b,x5) \
248        x5 += x4; \
249          round5(a,b,c,x6) \
250        x6 -= x5 ^ ((~x4)>>23); \
251          round5(b,c,a,x7) \
252        x7 ^= x6; \
253        x0 += x7; \
254        x1 -= x0 ^ ((~x7)<<19); \
255        x2 ^= x1; \
256        x3 += x2; \
257        x4 -= x3 ^ ((~x2)>>23); \
258        x5 ^= x4; \
259        x6 += x5; \
260        x7 -= x6 ^ XOR_CONST2;
261
262#define pass7n(a,b,c) \
263          round7(a,b,c,x0) \
264        x0 -= x7 ^ XOR_CONST1; \
265          round7(b,c,a,x1) \
266        x1 ^= x0; \
267          round7(c,a,b,x2) \
268        x2 += x1; \
269          round7(a,b,c,x3) \
270        x3 -= x2 ^ ((~x1)<<19); \
271          round7(b,c,a,x4) \
272        x4 ^= x3; \
273          round7(c,a,b,x5) \
274        x5 += x4; \
275          round7(a,b,c,x6) \
276        x6 -= x5 ^ ((~x4)>>23); \
277          round7(b,c,a,x7) \
278        x7 ^= x6; \
279        x0 += x7; \
280        x1 -= x0 ^ ((~x7)<<19); \
281        x2 ^= x1; \
282        x3 += x2; \
283        x4 -= x3 ^ ((~x2)>>23); \
284        x5 ^= x4; \
285        x6 += x5; \
286        x7 -= x6 ^ XOR_CONST2;
287
288#define pass5(a,b,c) \
289        pass_start \
290          round5(a,b,c,x0) \
291          round5(b,c,a,x1) \
292          round5(c,a,b,x2) \
293          round5(a,b,c,x3) \
294          round5(b,c,a,x4) \
295          round5(c,a,b,x5) \
296          round5(a,b,c,x6) \
297          round5(b,c,a,x7) \
298        pass_end
299
300#define pass7(a,b,c) \
301        pass_start \
302          round7(a,b,c,x0) \
303          round7(b,c,a,x1) \
304          round7(c,a,b,x2) \
305          round7(a,b,c,x3) \
306          round7(b,c,a,x4) \
307          round7(c,a,b,x5) \
308          round7(a,b,c,x6) \
309          round7(b,c,a,x7) \
310        pass_end
311
312
313#define pass9(a,b,c) \
314        pass_start \
315          round9(a,b,c,x0) \
316          round9(b,c,a,x1) \
317          round9(c,a,b,x2) \
318          round9(a,b,c,x3) \
319          round9(b,c,a,x4) \
320          round9(c,a,b,x5) \
321          round9(a,b,c,x6) \
322          round9(b,c,a,x7) \
323        pass_end
324
[1]325#define feedforward \
[18]326          a ^= aa; \
327          b -= bb; \
328          c += cc;
[1]329
[18]330
331/* This version works ok with C variant and also with new asm version
332 * that just wastes a register r8
333 * reason? who knows, write forwarding is faster than keeping value
334 * in register? :)
335 */
[1]336#define compress \
[18]337        save_abc \
338          pass5n(a,b,c) \
339          pass7n(c,a,b) \
340          pass9(b,c,a) \
341          for(pass_no=3; pass_no<PASSES; pass_no++) { \
342                key_schedule \
343                pass9(a,b,c) \
344                tmpa=a; a=c; c=b; b=tmpa; \
345          } \
346        feedforward
[1]347
[18]348#define compress_old \
349        save_abc \
350          pass5(a,b,c) \
351          key_schedule \
352          pass7(c,a,b) \
353          key_schedule \
354          pass9(b,c,a) \
355          for(pass_no=3; pass_no<PASSES; pass_no++) { \
356                key_schedule \
357                pass9(a,b,c) \
358                tmpa=a; a=c; c=b; b=tmpa; \
359          } \
360        feedforward
361
[1]362#define tiger_compress_macro(str, state) \
363{ \
[18]364  register word64 a, b, c; \
365  register word64 tmpa; \
[1]366  word64 aa, bb, cc; \
[18]367  word64 x0, x1, x2, x3, x4, x5, x6, x7; \
[1]368  int pass_no; \
369\
370  a = state[0]; \
371  b = state[1]; \
372  c = state[2]; \
373\
374  x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
375  x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
376\
377  compress; \
378\
379  state[0] = a; \
380  state[1] = b; \
381  state[2] = c; \
382}
383
[171]384void tiger_compress(const word64 *str, word64 state[3])
[1]385{
[481]386  tiger_compress_macro(((const word64*)str), ((word64*)state));
[1]387}
388
[171]389void tiger_t(const word64 *str, word64 length, word64 res[3])
[1]390{
391  register word64 i;
392
393#ifdef BIG_ENDIAN
394  register word64 j = 0;
395  unsigned char temp[64];
396#endif
397
398  /*
[18]399   * res[0]=0x0123456789ABCDEFLL;
400   * res[1]=0xFEDCBA9876543210LL;
401   * res[2]=0xF096A5B4C3B2E187LL;
402   */
[1]403
404  for(i=length; i>=64; i-=64)
405    {
406#ifdef BIG_ENDIAN
407      for(j=0; j<64; j++)
[18]408        temp[j^7] = ((sh_byte*)str)[j];
[1]409      tiger_compress(((word64*)temp), res);
410#else
411      tiger_compress(str, res);
412#endif
413      str += 8;
414    }
415}
416
[170]417void tiger(const word64 *str, word64 length, word64 res[3])
[1]418{
419  register word64 i;
420  register word64 j = 0;
[440]421  union {
[481]422    word64 w64_temp[8];
423    unsigned char temp[64];
424  } dd;
425  union {
[440]426    word64 itmp;
427    unsigned char ctmp[8];
428  } uu;
[1]429
430  /*
[18]431   * res[0]=0x0123456789ABCDEFLL;
432   * res[1]=0xFEDCBA9876543210LL;
433   * res[2]=0xF096A5B4C3B2E187LL;
434   */
[1]435
436  for(i=length; i>=64; i-=64)
437    {
438#ifdef BIG_ENDIAN
439      for(j=0; j<64; j++)
[481]440        dd.temp[j^7] = ((sh_byte*)str)[j];
441      tiger_compress((dd.w64_temp), res);
[1]442#else
443      tiger_compress(str, res);
444#endif
445      str += 8;
446    }
447
448#ifdef BIG_ENDIAN
449  for(j=0; j<i; j++)
[481]450    dd.temp[j^7] = ((sh_byte*)str)[j];
[1]451
[481]452  dd.temp[j^7] = 0x01;
[1]453  j++;
454  for(; j&7; j++)
[481]455    dd.temp[j^7] = 0;
[1]456#else
[18]457
458#ifndef USE_MEMSET
[1]459  for(j=0; j<i; j++)
[481]460    dd.temp[j] = ((const sh_byte*)str)[j];
[18]461#else
[481]462  memcpy( dd.temp, str, j=i );
[18]463#endif
[481]464  dd.temp[j++] = 0x01;
[1]465  for(; j&7; j++)
[481]466        dd.temp[j] = 0;
[18]467
[1]468#endif
[18]469
[1]470  if(j>56)
471    {
[18]472#ifndef USE_MEMSET
[1]473      for(; j<64; j++)
[481]474        dd.temp[j] = 0;
[18]475#else
[481]476      memset( (dd.temp)+j, 0, 64-j);
[18]477#endif
[481]478      tiger_compress((dd.w64_temp), res);
[1]479      j=0;
480    }
481
[18]482#ifndef USE_MEMSET
[1]483  for(; j<56; j++)
[481]484    dd.temp[j] = 0;
[18]485#else
[481]486  memset( (dd.temp)+j, 0, 56-j);
[18]487#endif
488
[440]489  /* Avoid gcc warning for type-punned pointer
490   */
491  uu.itmp = ((word64)length)<<3;
492  for (j=0; j<8; j++)
[481]493        dd.temp[56+j] = uu.ctmp[j];
[440]494
[481]495  tiger_compress((dd.w64_temp), res);
[1]496}
497
498#endif
Note: See TracBrowser for help on using the repository browser.