source: trunk/src/sh_tiger1_64.c@ 51

Last change on this file since 51 was 46, checked in by rainer, 18 years ago

Fix x86_64 build failure with gcc 4.x (as well as some gcc 4.x warnings)

File size: 9.7 KB
Line 
1/* Do not include ANY system headers here. The implementation is */
2/* somehow flawed - maybe something gets overlayed by definitions */
3/* in the system headers. Results will become incorrect. */
4
5#include "config_xor.h"
6
7#if defined(TIGER_64_BIT)
8
9/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
10
11#undef USE_MEMSET
12
13/* Big endian: */
14#ifdef WORDS_BIGENDIAN
15#define BIG_ENDIAN
16#endif
17
18/* Tiger: A Fast New Hash Function
19 *
20 * Ross Anderson and Eli Biham
21 *
22 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
23 *
24 * Tiger has no usage restrictions nor patents. It can be used freely,
25 * with the reference implementation, with other implementations or with
26 * a modification to the reference implementation (as long as it still
27 * implements Tiger). We only ask you to let us know about your
28 * implementation and to cite the origin of Tiger and of the reference
29 * implementation.
30 *
31 *
32 * The authors' home pages can be found both in
33 * http://www.cs.technion.ac.il/~biham/ and in
34 * http://www.cl.cam.ac.uk/users/rja14/.
35 * The authors' email addresses are biham@cs.technion.ac.il
36 * and rja14@cl.cam.ac.uk.
37 */
38
39#if defined(HAVE_LONG_64)
40typedef unsigned long int word64;
41#elif defined(HAVE_LONG_LONG_64)
42typedef unsigned long long int word64;
43#else
44#error No 64 bit type found !
45#endif
46
47#if defined(HAVE_INT_32)
48typedef unsigned int sh_word32;
49#elif defined(HAVE_LONG_32)
50typedef unsigned long sh_word32;
51#elif defined(HAVE_SHORT_32)
52typedef unsigned short sh_word32;
53#else
54#error No 32 bit type found !
55#endif
56
57typedef unsigned char sh_byte;
58
59#if defined(TIGER_OPT_ASM)
60#define TIGER_ASM64_2 1
61#else
62#define TIGER_C 1
63#endif
64
65/* The number of passes of the hash function. */
66/* Three passes are recommended. */
67/* Use four passes when you need extra security. */
68/* Must be at least three. */
69#define PASSES 3
70
71extern word64 tiger_table[4*256];
72
73/* Volatile can help if compiler is smart enough to use memory operand */
74static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
75static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
76
77#define t1 (tiger_table)
78#define t2 (tiger_table+256)
79#define t3 (tiger_table+256*2)
80#define t4 (tiger_table+256*3)
81
82#define pass_start
83#define pass_end
84
85
86
87#define save_abc \
88 aa = a; \
89 bb = b; \
90 cc = c;
91
92#ifdef TIGER_C
93
94#define BN(x,n) (((x)>>((n)*8))&0xFF)
95
96
97/* Depending on outer code one of these two can be better*/
98#define roundX(a,b,c,x) \
99 c ^= x; \
100 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
101 t3[BN(c,4)] ^ t4[BN(c,6)] ; \
102 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
103 t2[BN(c,5)] ^ t1[BN(c,7)] ;
104
105#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
106#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
107#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
108
109#endif
110
111
112#ifdef TIGER_OPT_ASM
113
114#define MASK0 0xFFL
115#define MASK8 0xFF00L
116#define MASK16 0xFF0000L
117#define MASK32 0xFF00000000LL
118#define MASK40 0xFF0000000000LL
119#define MASK48 0xFF000000000000LL
120
121#define roundstart __asm__ (
122
123/* a will be moved into different reg each round
124 * using register substitution feature of GCC asm
125 * b will be moved in 2-nd pass rounds only
126 */
127
128
129#define roundend(a,b,c,x) \
130 : "+r" (a), "+r" (b), "+r" (c) \
131 : "r" (a), "r" (b), "r" (c), "m" (x), "r" (&tiger_table),\
132 "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
133 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
134
135
136/* c ^= x;
137 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
138 t3[BN(c,4)] ^ t4[BN(c,6)] ;
139 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
140 t2[BN(c,5)] ^ t1[BN(c,7)] ; */
141
142#define roundX(a,b,c,x) \
143" movl %10, %%ebx \n"\
144" movq %11, %%rcx \n"\
145" movq %13, %%rdx \n"\
146" movq %6, %%r8 \n"\
147" xorq %%r8, %2 \n" \
148" andq %2, %%rbx \n"\
149" andq %2, %%rcx \n"\
150" andq %2, %%rdx \n"\
151" shrl $(16-3), %%ebx \n"\
152" shrq $(32-3), %%rcx \n"\
153" shrq $(48-3), %%rdx \n"\
154" movzbl %2b, %%eax \n"\
155" movzwl %2w, %%edi \n"\
156" movq (%7,%%rax,8), %%rsi \n"\
157" shrl $(8), %%edi \n" \
158" movq %2, %%rax \n" \
159" xorq (2048*1)(%7,%%rbx), %%rsi \n"\
160" movq %2, %%rbx \n"\
161" shrl $24, %%eax \n"\
162" andq %12, %%rbx \n"\
163" xorq (2048*2)(%7,%%rcx), %%rsi \n"\
164" shrq $(40-3), %%rbx \n"\
165" movq %2, %%rcx \n"\
166" xorq (2048*3)(%7,%%rdx), %%rsi \n"\
167" movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
168" shrq $56, %%rcx \n"\
169" xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
170" xorq (2048*1)(%7,%%rbx), %%rdx \n" \
171" subq %%rsi, %0 \n"\
172" xorq (%7,%%rcx,8), %%rdx \n"\
173" addq %%rdx, %1 \n"
174
175#define round5(a,b,c,x) \
176 roundstart \
177 roundX(a,b,c,x) \
178 /* b*=5; */ \
179 "leaq (%1,%1,4), %1\n" \
180 roundend(a,b,c,x)
181
182
183#define round7(a,b,c,x) \
184 roundstart \
185 roundX(a,b,c,x) \
186 roundend(a,b,c,x) \
187 /* b*=7; */ \
188 __asm__ ( \
189 "leaq (%1,%1,8), %0\n" \
190 "addq %1, %1 \n" \
191 "subq %1, %0 " \
192 :"=&r" (b): "r"(b): "1" );
193
194#define round9(a,b,c,x) \
195 roundstart \
196 roundX(a,b,c,x) \
197 "leaq (%1,%1,8), %1\n" \
198 roundend(a,b,c,x)
199
200#endif
201
202
203
204
205/* ============== Common macros ================== */
206
207#define key_schedule \
208 x0 -= x7 ^ XOR_CONST1; \
209 x1 ^= x0; \
210 x2 += x1;\
211 x3 -= x2 ^ ((~x1)<<19);\
212 x4 ^= x3;\
213 x5 += x4;\
214 x6 -= x5 ^ ((~x4)>>23); \
215 x7 ^= x6; \
216 x0 += x7; \
217 x1 -= x0 ^ ((~x7)<<19); \
218 x2 ^= x1; \
219 x3 += x2; \
220 x4 -= x3 ^ ((~x2)>>23); \
221 x5 ^= x4; \
222 x6 += x5; \
223 x7 -= x6 ^ XOR_CONST2;
224
225#define pass5n(a,b,c) \
226 round5(a,b,c,x0) \
227 x0 -= x7 ^ XOR_CONST1; \
228 round5(b,c,a,x1) \
229 x1 ^= x0; \
230 round5(c,a,b,x2) \
231 x2 += x1; \
232 round5(a,b,c,x3) \
233 x3 -= x2 ^ ((~x1)<<19); \
234 round5(b,c,a,x4) \
235 x4 ^= x3; \
236 round5(c,a,b,x5) \
237 x5 += x4; \
238 round5(a,b,c,x6) \
239 x6 -= x5 ^ ((~x4)>>23); \
240 round5(b,c,a,x7) \
241 x7 ^= x6; \
242 x0 += x7; \
243 x1 -= x0 ^ ((~x7)<<19); \
244 x2 ^= x1; \
245 x3 += x2; \
246 x4 -= x3 ^ ((~x2)>>23); \
247 x5 ^= x4; \
248 x6 += x5; \
249 x7 -= x6 ^ XOR_CONST2;
250
251#define pass7n(a,b,c) \
252 round7(a,b,c,x0) \
253 x0 -= x7 ^ XOR_CONST1; \
254 round7(b,c,a,x1) \
255 x1 ^= x0; \
256 round7(c,a,b,x2) \
257 x2 += x1; \
258 round7(a,b,c,x3) \
259 x3 -= x2 ^ ((~x1)<<19); \
260 round7(b,c,a,x4) \
261 x4 ^= x3; \
262 round7(c,a,b,x5) \
263 x5 += x4; \
264 round7(a,b,c,x6) \
265 x6 -= x5 ^ ((~x4)>>23); \
266 round7(b,c,a,x7) \
267 x7 ^= x6; \
268 x0 += x7; \
269 x1 -= x0 ^ ((~x7)<<19); \
270 x2 ^= x1; \
271 x3 += x2; \
272 x4 -= x3 ^ ((~x2)>>23); \
273 x5 ^= x4; \
274 x6 += x5; \
275 x7 -= x6 ^ XOR_CONST2;
276
277#define pass5(a,b,c) \
278 pass_start \
279 round5(a,b,c,x0) \
280 round5(b,c,a,x1) \
281 round5(c,a,b,x2) \
282 round5(a,b,c,x3) \
283 round5(b,c,a,x4) \
284 round5(c,a,b,x5) \
285 round5(a,b,c,x6) \
286 round5(b,c,a,x7) \
287 pass_end
288
289#define pass7(a,b,c) \
290 pass_start \
291 round7(a,b,c,x0) \
292 round7(b,c,a,x1) \
293 round7(c,a,b,x2) \
294 round7(a,b,c,x3) \
295 round7(b,c,a,x4) \
296 round7(c,a,b,x5) \
297 round7(a,b,c,x6) \
298 round7(b,c,a,x7) \
299 pass_end
300
301
302#define pass9(a,b,c) \
303 pass_start \
304 round9(a,b,c,x0) \
305 round9(b,c,a,x1) \
306 round9(c,a,b,x2) \
307 round9(a,b,c,x3) \
308 round9(b,c,a,x4) \
309 round9(c,a,b,x5) \
310 round9(a,b,c,x6) \
311 round9(b,c,a,x7) \
312 pass_end
313
314#define feedforward \
315 a ^= aa; \
316 b -= bb; \
317 c += cc;
318
319
320/* This version works ok with C variant and also with new asm version
321 * that just wastes a register r8
322 * reason? who knows, write forwarding is faster than keeping value
323 * in register? :)
324 */
325#define compress \
326 save_abc \
327 pass5n(a,b,c) \
328 pass7n(c,a,b) \
329 pass9(b,c,a) \
330 for(pass_no=3; pass_no<PASSES; pass_no++) { \
331 key_schedule \
332 pass9(a,b,c) \
333 tmpa=a; a=c; c=b; b=tmpa; \
334 } \
335 feedforward
336
337#define compress_old \
338 save_abc \
339 pass5(a,b,c) \
340 key_schedule \
341 pass7(c,a,b) \
342 key_schedule \
343 pass9(b,c,a) \
344 for(pass_no=3; pass_no<PASSES; pass_no++) { \
345 key_schedule \
346 pass9(a,b,c) \
347 tmpa=a; a=c; c=b; b=tmpa; \
348 } \
349 feedforward
350
351#define tiger_compress_macro(str, state) \
352{ \
353 register word64 a, b, c; \
354 register word64 tmpa; \
355 word64 aa, bb, cc; \
356 word64 x0, x1, x2, x3, x4, x5, x6, x7; \
357 int pass_no; \
358\
359 a = state[0]; \
360 b = state[1]; \
361 c = state[2]; \
362\
363 x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
364 x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
365\
366 compress; \
367\
368 state[0] = a; \
369 state[1] = b; \
370 state[2] = c; \
371}
372
373void tiger_compress(word64 *str, word64 state[3])
374{
375 tiger_compress_macro(((word64*)str), ((word64*)state));
376}
377
378void tiger_t(word64 *str, word64 length, word64 res[3])
379{
380 register word64 i;
381
382#ifdef BIG_ENDIAN
383 register word64 j = 0;
384 unsigned char temp[64];
385#endif
386
387 /*
388 * res[0]=0x0123456789ABCDEFLL;
389 * res[1]=0xFEDCBA9876543210LL;
390 * res[2]=0xF096A5B4C3B2E187LL;
391 */
392
393 for(i=length; i>=64; i-=64)
394 {
395#ifdef BIG_ENDIAN
396 for(j=0; j<64; j++)
397 temp[j^7] = ((sh_byte*)str)[j];
398 tiger_compress(((word64*)temp), res);
399#else
400 tiger_compress(str, res);
401#endif
402 str += 8;
403 }
404}
405
406void tiger(word64 *str, word64 length, word64 res[3])
407{
408 register word64 i;
409 register word64 j = 0;
410 unsigned char temp[64];
411
412 /*
413 * res[0]=0x0123456789ABCDEFLL;
414 * res[1]=0xFEDCBA9876543210LL;
415 * res[2]=0xF096A5B4C3B2E187LL;
416 */
417
418 for(i=length; i>=64; i-=64)
419 {
420#ifdef BIG_ENDIAN
421 for(j=0; j<64; j++)
422 temp[j^7] = ((sh_byte*)str)[j];
423 tiger_compress(((word64*)temp), res);
424#else
425 tiger_compress(str, res);
426#endif
427 str += 8;
428 }
429
430#ifdef BIG_ENDIAN
431 for(j=0; j<i; j++)
432 temp[j^7] = ((sh_byte*)str)[j];
433
434 temp[j^7] = 0x01;
435 j++;
436 for(; j&7; j++)
437 temp[j^7] = 0;
438#else
439
440#ifndef USE_MEMSET
441 for(j=0; j<i; j++)
442 temp[j] = ((sh_byte*)str)[j];
443#else
444 memcpy( temp, str, j=i );
445#endif
446 temp[j++] = 0x01;
447 for(; j&7; j++)
448 temp[j] = 0;
449
450#endif
451
452 if(j>56)
453 {
454#ifndef USE_MEMSET
455 for(; j<64; j++)
456 temp[j] = 0;
457#else
458 memset( temp+j, 0, 64-j);
459#endif
460 tiger_compress(((word64*)temp), res);
461 j=0;
462 }
463
464#ifndef USE_MEMSET
465 for(; j<56; j++)
466 temp[j] = 0;
467#else
468 memset( temp+j, 0, 56-j);
469#endif
470
471 ((word64*)(&(temp[56])))[0] = ((word64)length)<<3;
472 tiger_compress(((word64*)temp), res);
473}
474
475#endif
Note: See TracBrowser for help on using the repository browser.