source: trunk/src/sh_tiger1_64.c@ 484

Last change on this file since 484 was 481, checked in by katerina, 9 years ago

Enhancements and fixes for tickets #374, #375, #376, #377, #378, and #379.

File size: 10.2 KB
RevLine 
[1]1/* Do not include ANY system headers here. The implementation is */
2/* somehow flawed - maybe something gets overlayed by definitions */
3/* in the system headers. Results will become incorrect. */
4
5#include "config_xor.h"
6
[474]7#if defined(__clang__)
8#undef TIGER_OPT_ASM
9#endif
10
[18]11#if defined(TIGER_64_BIT)
[1]12
[474]13#if defined(GCC_VERSION_MAJOR) && !defined(__clang__)
[451]14#if ((GCC_VERSION_MAJOR == 4) && (GCC_VERSION_MINOR > 6))
15#pragma GCC optimize ("O1")
16#endif
17#endif
18
[474]19
[18]20/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
21
22#undef USE_MEMSET
23
[1]24/* Big endian: */
25#ifdef WORDS_BIGENDIAN
26#define BIG_ENDIAN
27#endif
28
29/* Tiger: A Fast New Hash Function
30 *
31 * Ross Anderson and Eli Biham
32 *
33 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
34 *
35 * Tiger has no usage restrictions nor patents. It can be used freely,
36 * with the reference implementation, with other implementations or with
37 * a modification to the reference implementation (as long as it still
38 * implements Tiger). We only ask you to let us know about your
39 * implementation and to cite the origin of Tiger and of the reference
40 * implementation.
41 *
42 *
43 * The authors' home pages can be found both in
44 * http://www.cs.technion.ac.il/~biham/ and in
45 * http://www.cl.cam.ac.uk/users/rja14/.
46 * The authors' email addresses are biham@cs.technion.ac.il
47 * and rja14@cl.cam.ac.uk.
48 */
49
50#if defined(HAVE_LONG_64)
51typedef unsigned long int word64;
[18]52#elif defined(HAVE_LONG_LONG_64)
53typedef unsigned long long int word64;
[1]54#else
[18]55#error No 64 bit type found !
[1]56#endif
57
58#if defined(HAVE_INT_32)
59typedef unsigned int sh_word32;
60#elif defined(HAVE_LONG_32)
61typedef unsigned long sh_word32;
62#elif defined(HAVE_SHORT_32)
63typedef unsigned short sh_word32;
64#else
[18]65#error No 32 bit type found !
[1]66#endif
67
68typedef unsigned char sh_byte;
69
[18]70#if defined(TIGER_OPT_ASM)
71#define TIGER_ASM64_2 1
72#else
73#define TIGER_C 1
[1]74#endif
75
[18]76/* The number of passes of the hash function. */
77/* Three passes are recommended. */
78/* Use four passes when you need extra security. */
79/* Must be at least three. */
[1]80#define PASSES 3
81
82extern word64 tiger_table[4*256];
83
[18]84/* Volatile can help if compiler is smart enough to use memory operand */
85static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
86static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
87
[1]88#define t1 (tiger_table)
89#define t2 (tiger_table+256)
90#define t3 (tiger_table+256*2)
91#define t4 (tiger_table+256*3)
92
[18]93#define pass_start
94#define pass_end
95
96
97
[1]98#define save_abc \
[18]99 aa = a; \
100 bb = b; \
101 cc = c;
[1]102
[18]103#ifdef TIGER_C
104
105#define BN(x,n) (((x)>>((n)*8))&0xFF)
106
107
108/* Depending on outer code one of these two can be better*/
109#define roundX(a,b,c,x) \
110 c ^= x; \
111 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
112 t3[BN(c,4)] ^ t4[BN(c,6)] ; \
113 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
114 t2[BN(c,5)] ^ t1[BN(c,7)] ;
115
116#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
117#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
118#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
119
[1]120#endif
121
122
[18]123#ifdef TIGER_OPT_ASM
124
125#define MASK0 0xFFL
126#define MASK8 0xFF00L
127#define MASK16 0xFF0000L
128#define MASK32 0xFF00000000LL
129#define MASK40 0xFF0000000000LL
130#define MASK48 0xFF000000000000LL
131
132#define roundstart __asm__ (
133
134/* a will be moved into different reg each round
135 * using register substitution feature of GCC asm
136 * b will be moved in 2-nd pass rounds only
137 */
138
139
140#define roundend(a,b,c,x) \
141 : "+r" (a), "+r" (b), "+r" (c) \
[46]142 : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
[18]143 "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
144 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
145
146
147/* c ^= x;
148 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
149 t3[BN(c,4)] ^ t4[BN(c,6)] ;
150 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
151 t2[BN(c,5)] ^ t1[BN(c,7)] ; */
152
153#define roundX(a,b,c,x) \
154" movl %10, %%ebx \n"\
155" movq %11, %%rcx \n"\
156" movq %13, %%rdx \n"\
157" movq %6, %%r8 \n"\
158" xorq %%r8, %2 \n" \
159" andq %2, %%rbx \n"\
160" andq %2, %%rcx \n"\
161" andq %2, %%rdx \n"\
162" shrl $(16-3), %%ebx \n"\
163" shrq $(32-3), %%rcx \n"\
164" shrq $(48-3), %%rdx \n"\
165" movzbl %2b, %%eax \n"\
166" movzwl %2w, %%edi \n"\
167" movq (%7,%%rax,8), %%rsi \n"\
168" shrl $(8), %%edi \n" \
169" movq %2, %%rax \n" \
170" xorq (2048*1)(%7,%%rbx), %%rsi \n"\
171" movq %2, %%rbx \n"\
172" shrl $24, %%eax \n"\
173" andq %12, %%rbx \n"\
174" xorq (2048*2)(%7,%%rcx), %%rsi \n"\
175" shrq $(40-3), %%rbx \n"\
176" movq %2, %%rcx \n"\
177" xorq (2048*3)(%7,%%rdx), %%rsi \n"\
178" movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
179" shrq $56, %%rcx \n"\
180" xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
181" xorq (2048*1)(%7,%%rbx), %%rdx \n" \
182" subq %%rsi, %0 \n"\
183" xorq (%7,%%rcx,8), %%rdx \n"\
184" addq %%rdx, %1 \n"
185
186#define round5(a,b,c,x) \
187 roundstart \
188 roundX(a,b,c,x) \
189 /* b*=5; */ \
190 "leaq (%1,%1,4), %1\n" \
191 roundend(a,b,c,x)
192
193
194#define round7(a,b,c,x) \
195 roundstart \
196 roundX(a,b,c,x) \
197 roundend(a,b,c,x) \
198 /* b*=7; */ \
199 __asm__ ( \
200 "leaq (%1,%1,8), %0\n" \
201 "addq %1, %1 \n" \
202 "subq %1, %0 " \
203 :"=&r" (b): "r"(b): "1" );
204
205#define round9(a,b,c,x) \
206 roundstart \
207 roundX(a,b,c,x) \
208 "leaq (%1,%1,8), %1\n" \
209 roundend(a,b,c,x)
210
211#endif
212
213
214
215
216/* ============== Common macros ================== */
217
[1]218#define key_schedule \
[18]219 x0 -= x7 ^ XOR_CONST1; \
220 x1 ^= x0; \
221 x2 += x1;\
222 x3 -= x2 ^ ((~x1)<<19);\
223 x4 ^= x3;\
224 x5 += x4;\
225 x6 -= x5 ^ ((~x4)>>23); \
226 x7 ^= x6; \
227 x0 += x7; \
228 x1 -= x0 ^ ((~x7)<<19); \
229 x2 ^= x1; \
230 x3 += x2; \
231 x4 -= x3 ^ ((~x2)>>23); \
232 x5 ^= x4; \
233 x6 += x5; \
234 x7 -= x6 ^ XOR_CONST2;
[1]235
[18]236#define pass5n(a,b,c) \
237 round5(a,b,c,x0) \
238 x0 -= x7 ^ XOR_CONST1; \
239 round5(b,c,a,x1) \
240 x1 ^= x0; \
241 round5(c,a,b,x2) \
242 x2 += x1; \
243 round5(a,b,c,x3) \
244 x3 -= x2 ^ ((~x1)<<19); \
245 round5(b,c,a,x4) \
246 x4 ^= x3; \
247 round5(c,a,b,x5) \
248 x5 += x4; \
249 round5(a,b,c,x6) \
250 x6 -= x5 ^ ((~x4)>>23); \
251 round5(b,c,a,x7) \
252 x7 ^= x6; \
253 x0 += x7; \
254 x1 -= x0 ^ ((~x7)<<19); \
255 x2 ^= x1; \
256 x3 += x2; \
257 x4 -= x3 ^ ((~x2)>>23); \
258 x5 ^= x4; \
259 x6 += x5; \
260 x7 -= x6 ^ XOR_CONST2;
261
262#define pass7n(a,b,c) \
263 round7(a,b,c,x0) \
264 x0 -= x7 ^ XOR_CONST1; \
265 round7(b,c,a,x1) \
266 x1 ^= x0; \
267 round7(c,a,b,x2) \
268 x2 += x1; \
269 round7(a,b,c,x3) \
270 x3 -= x2 ^ ((~x1)<<19); \
271 round7(b,c,a,x4) \
272 x4 ^= x3; \
273 round7(c,a,b,x5) \
274 x5 += x4; \
275 round7(a,b,c,x6) \
276 x6 -= x5 ^ ((~x4)>>23); \
277 round7(b,c,a,x7) \
278 x7 ^= x6; \
279 x0 += x7; \
280 x1 -= x0 ^ ((~x7)<<19); \
281 x2 ^= x1; \
282 x3 += x2; \
283 x4 -= x3 ^ ((~x2)>>23); \
284 x5 ^= x4; \
285 x6 += x5; \
286 x7 -= x6 ^ XOR_CONST2;
287
288#define pass5(a,b,c) \
289 pass_start \
290 round5(a,b,c,x0) \
291 round5(b,c,a,x1) \
292 round5(c,a,b,x2) \
293 round5(a,b,c,x3) \
294 round5(b,c,a,x4) \
295 round5(c,a,b,x5) \
296 round5(a,b,c,x6) \
297 round5(b,c,a,x7) \
298 pass_end
299
300#define pass7(a,b,c) \
301 pass_start \
302 round7(a,b,c,x0) \
303 round7(b,c,a,x1) \
304 round7(c,a,b,x2) \
305 round7(a,b,c,x3) \
306 round7(b,c,a,x4) \
307 round7(c,a,b,x5) \
308 round7(a,b,c,x6) \
309 round7(b,c,a,x7) \
310 pass_end
311
312
313#define pass9(a,b,c) \
314 pass_start \
315 round9(a,b,c,x0) \
316 round9(b,c,a,x1) \
317 round9(c,a,b,x2) \
318 round9(a,b,c,x3) \
319 round9(b,c,a,x4) \
320 round9(c,a,b,x5) \
321 round9(a,b,c,x6) \
322 round9(b,c,a,x7) \
323 pass_end
324
[1]325#define feedforward \
[18]326 a ^= aa; \
327 b -= bb; \
328 c += cc;
[1]329
[18]330
331/* This version works ok with C variant and also with new asm version
332 * that just wastes a register r8
333 * reason? who knows, write forwarding is faster than keeping value
334 * in register? :)
335 */
[1]336#define compress \
[18]337 save_abc \
338 pass5n(a,b,c) \
339 pass7n(c,a,b) \
340 pass9(b,c,a) \
341 for(pass_no=3; pass_no<PASSES; pass_no++) { \
342 key_schedule \
343 pass9(a,b,c) \
344 tmpa=a; a=c; c=b; b=tmpa; \
345 } \
346 feedforward
[1]347
[18]348#define compress_old \
349 save_abc \
350 pass5(a,b,c) \
351 key_schedule \
352 pass7(c,a,b) \
353 key_schedule \
354 pass9(b,c,a) \
355 for(pass_no=3; pass_no<PASSES; pass_no++) { \
356 key_schedule \
357 pass9(a,b,c) \
358 tmpa=a; a=c; c=b; b=tmpa; \
359 } \
360 feedforward
361
[1]362#define tiger_compress_macro(str, state) \
363{ \
[18]364 register word64 a, b, c; \
365 register word64 tmpa; \
[1]366 word64 aa, bb, cc; \
[18]367 word64 x0, x1, x2, x3, x4, x5, x6, x7; \
[1]368 int pass_no; \
369\
370 a = state[0]; \
371 b = state[1]; \
372 c = state[2]; \
373\
374 x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
375 x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
376\
377 compress; \
378\
379 state[0] = a; \
380 state[1] = b; \
381 state[2] = c; \
382}
383
[171]384void tiger_compress(const word64 *str, word64 state[3])
[1]385{
[481]386 tiger_compress_macro(((const word64*)str), ((word64*)state));
[1]387}
388
[171]389void tiger_t(const word64 *str, word64 length, word64 res[3])
[1]390{
391 register word64 i;
392
393#ifdef BIG_ENDIAN
394 register word64 j = 0;
395 unsigned char temp[64];
396#endif
397
398 /*
[18]399 * res[0]=0x0123456789ABCDEFLL;
400 * res[1]=0xFEDCBA9876543210LL;
401 * res[2]=0xF096A5B4C3B2E187LL;
402 */
[1]403
404 for(i=length; i>=64; i-=64)
405 {
406#ifdef BIG_ENDIAN
407 for(j=0; j<64; j++)
[18]408 temp[j^7] = ((sh_byte*)str)[j];
[1]409 tiger_compress(((word64*)temp), res);
410#else
411 tiger_compress(str, res);
412#endif
413 str += 8;
414 }
415}
416
[170]417void tiger(const word64 *str, word64 length, word64 res[3])
[1]418{
419 register word64 i;
420 register word64 j = 0;
[440]421 union {
[481]422 word64 w64_temp[8];
423 unsigned char temp[64];
424 } dd;
425 union {
[440]426 word64 itmp;
427 unsigned char ctmp[8];
428 } uu;
[1]429
430 /*
[18]431 * res[0]=0x0123456789ABCDEFLL;
432 * res[1]=0xFEDCBA9876543210LL;
433 * res[2]=0xF096A5B4C3B2E187LL;
434 */
[1]435
436 for(i=length; i>=64; i-=64)
437 {
438#ifdef BIG_ENDIAN
439 for(j=0; j<64; j++)
[481]440 dd.temp[j^7] = ((sh_byte*)str)[j];
441 tiger_compress((dd.w64_temp), res);
[1]442#else
443 tiger_compress(str, res);
444#endif
445 str += 8;
446 }
447
448#ifdef BIG_ENDIAN
449 for(j=0; j<i; j++)
[481]450 dd.temp[j^7] = ((sh_byte*)str)[j];
[1]451
[481]452 dd.temp[j^7] = 0x01;
[1]453 j++;
454 for(; j&7; j++)
[481]455 dd.temp[j^7] = 0;
[1]456#else
[18]457
458#ifndef USE_MEMSET
[1]459 for(j=0; j<i; j++)
[481]460 dd.temp[j] = ((const sh_byte*)str)[j];
[18]461#else
[481]462 memcpy( dd.temp, str, j=i );
[18]463#endif
[481]464 dd.temp[j++] = 0x01;
[1]465 for(; j&7; j++)
[481]466 dd.temp[j] = 0;
[18]467
[1]468#endif
[18]469
[1]470 if(j>56)
471 {
[18]472#ifndef USE_MEMSET
[1]473 for(; j<64; j++)
[481]474 dd.temp[j] = 0;
[18]475#else
[481]476 memset( (dd.temp)+j, 0, 64-j);
[18]477#endif
[481]478 tiger_compress((dd.w64_temp), res);
[1]479 j=0;
480 }
481
[18]482#ifndef USE_MEMSET
[1]483 for(; j<56; j++)
[481]484 dd.temp[j] = 0;
[18]485#else
[481]486 memset( (dd.temp)+j, 0, 56-j);
[18]487#endif
488
[440]489 /* Avoid gcc warning for type-punned pointer
490 */
491 uu.itmp = ((word64)length)<<3;
492 for (j=0; j<8; j++)
[481]493 dd.temp[56+j] = uu.ctmp[j];
[440]494
[481]495 tiger_compress((dd.w64_temp), res);
[1]496}
497
498#endif
Note: See TracBrowser for help on using the repository browser.