source: trunk/src/sh_tiger1_64.c@ 45

Last change on this file since 45 was 18, checked in by rainer, 19 years ago

Optimized version of tiger algorithm, and basic ingredients for unit testing (part 2)

File size: 9.7 KB
RevLine 
[1]1/* Do not include ANY system headers here. The implementation is */
2/* somehow flawed - maybe something gets overlayed by definitions */
3/* in the system headers. Results will become incorrect. */
4
5#include "config_xor.h"
6
[18]7#if defined(TIGER_64_BIT)
[1]8
[18]9/* #if defined(HAVE_LONG_64) || defined(HAVE_LONG_LONG_64) */
10
11#undef USE_MEMSET
12
[1]13/* Big endian: */
14#ifdef WORDS_BIGENDIAN
15#define BIG_ENDIAN
16#endif
17
18/* Tiger: A Fast New Hash Function
19 *
20 * Ross Anderson and Eli Biham
21 *
22 * From the homepage (http://www.cs.technion.ac.il/~biham/Reports/Tiger/):
23 *
24 * Tiger has no usage restrictions nor patents. It can be used freely,
25 * with the reference implementation, with other implementations or with
26 * a modification to the reference implementation (as long as it still
27 * implements Tiger). We only ask you to let us know about your
28 * implementation and to cite the origin of Tiger and of the reference
29 * implementation.
30 *
31 *
32 * The authors' home pages can be found both in
33 * http://www.cs.technion.ac.il/~biham/ and in
34 * http://www.cl.cam.ac.uk/users/rja14/.
35 * The authors' email addresses are biham@cs.technion.ac.il
36 * and rja14@cl.cam.ac.uk.
37 */
38
39#if defined(HAVE_LONG_64)
40typedef unsigned long int word64;
[18]41#elif defined(HAVE_LONG_LONG_64)
42typedef unsigned long long int word64;
[1]43#else
[18]44#error No 64 bit type found !
[1]45#endif
46
47#if defined(HAVE_INT_32)
48typedef unsigned int sh_word32;
49#elif defined(HAVE_LONG_32)
50typedef unsigned long sh_word32;
51#elif defined(HAVE_SHORT_32)
52typedef unsigned short sh_word32;
53#else
[18]54#error No 32 bit type found !
[1]55#endif
56
57typedef unsigned char sh_byte;
58
[18]59#if defined(TIGER_OPT_ASM)
60#define TIGER_ASM64_2 1
61#else
62#define TIGER_C 1
[1]63#endif
64
[18]65/* The number of passes of the hash function. */
66/* Three passes are recommended. */
67/* Use four passes when you need extra security. */
68/* Must be at least three. */
[1]69#define PASSES 3
70
71extern word64 tiger_table[4*256];
72
[18]73/* Volatile can help if compiler is smart enough to use memory operand */
74static /*volatile*/ const word64 XOR_CONST1=0xA5A5A5A5A5A5A5A5LL;
75static /*volatile*/ const word64 XOR_CONST2=0x0123456789ABCDEFLL;
76
[1]77#define t1 (tiger_table)
78#define t2 (tiger_table+256)
79#define t3 (tiger_table+256*2)
80#define t4 (tiger_table+256*3)
81
[18]82#define pass_start
83#define pass_end
84
85
86
[1]87#define save_abc \
[18]88 aa = a; \
89 bb = b; \
90 cc = c;
[1]91
[18]92#ifdef TIGER_C
93
94#define BN(x,n) (((x)>>((n)*8))&0xFF)
95
96
97/* Depending on outer code one of these two can be better*/
98#define roundX(a,b,c,x) \
99 c ^= x; \
100 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^ \
101 t3[BN(c,4)] ^ t4[BN(c,6)] ; \
102 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^ \
103 t2[BN(c,5)] ^ t1[BN(c,7)] ;
104
105#define round5(a,b,c,x) roundX(a,b,c,x) b = b+b*4;
106#define round7(a,b,c,x) roundX(a,b,c,x) b = b*8-b;
107#define round9(a,b,c,x) roundX(a,b,c,x) b = b+b*8;
108
[1]109#endif
110
111
[18]112#ifdef TIGER_OPT_ASM
113
114#define MASK0 0xFFL
115#define MASK8 0xFF00L
116#define MASK16 0xFF0000L
117#define MASK32 0xFF00000000LL
118#define MASK40 0xFF0000000000LL
119#define MASK48 0xFF000000000000LL
120
121#define roundstart __asm__ (
122
123/* a will be moved into different reg each round
124 * using register substitution feature of GCC asm
125 * b will be moved in 2-nd pass rounds only
126 */
127
128
129#define roundend(a,b,c,x) \
130 : "+r" (a), "+r" (b), "+r" (c) \
131 : "0" (a), "1" (b), "2" (c), "m" (x), "r" (&tiger_table),\
132 "i" (MASK0), "i" (MASK8), "i" (MASK16), "r" (MASK32), "r" (MASK40), "r" (MASK48) \
133 : "3", "%rax","%rbx","%rcx","%rdx","%rsi", "%edi", "%r8" );
134
135
136/* c ^= x;
137 a -= t1[BN(c,0)] ^ t2[BN(c,2)] ^
138 t3[BN(c,4)] ^ t4[BN(c,6)] ;
139 b += t4[BN(c,1)] ^ t3[BN(c,3)] ^
140 t2[BN(c,5)] ^ t1[BN(c,7)] ; */
141
142#define roundX(a,b,c,x) \
143" movl %10, %%ebx \n"\
144" movq %11, %%rcx \n"\
145" movq %13, %%rdx \n"\
146" movq %6, %%r8 \n"\
147" xorq %%r8, %2 \n" \
148" andq %2, %%rbx \n"\
149" andq %2, %%rcx \n"\
150" andq %2, %%rdx \n"\
151" shrl $(16-3), %%ebx \n"\
152" shrq $(32-3), %%rcx \n"\
153" shrq $(48-3), %%rdx \n"\
154" movzbl %2b, %%eax \n"\
155" movzwl %2w, %%edi \n"\
156" movq (%7,%%rax,8), %%rsi \n"\
157" shrl $(8), %%edi \n" \
158" movq %2, %%rax \n" \
159" xorq (2048*1)(%7,%%rbx), %%rsi \n"\
160" movq %2, %%rbx \n"\
161" shrl $24, %%eax \n"\
162" andq %12, %%rbx \n"\
163" xorq (2048*2)(%7,%%rcx), %%rsi \n"\
164" shrq $(40-3), %%rbx \n"\
165" movq %2, %%rcx \n"\
166" xorq (2048*3)(%7,%%rdx), %%rsi \n"\
167" movq (2048*3)(%7,%%rdi,8), %%rdx \n"\
168" shrq $56, %%rcx \n"\
169" xorq (2048*2)(%7,%%rax,8), %%rdx \n"\
170" xorq (2048*1)(%7,%%rbx), %%rdx \n" \
171" subq %%rsi, %0 \n"\
172" xorq (%7,%%rcx,8), %%rdx \n"\
173" addq %%rdx, %1 \n"
174
175#define round5(a,b,c,x) \
176 roundstart \
177 roundX(a,b,c,x) \
178 /* b*=5; */ \
179 "leaq (%1,%1,4), %1\n" \
180 roundend(a,b,c,x)
181
182
183#define round7(a,b,c,x) \
184 roundstart \
185 roundX(a,b,c,x) \
186 roundend(a,b,c,x) \
187 /* b*=7; */ \
188 __asm__ ( \
189 "leaq (%1,%1,8), %0\n" \
190 "addq %1, %1 \n" \
191 "subq %1, %0 " \
192 :"=&r" (b): "r"(b): "1" );
193
194#define round9(a,b,c,x) \
195 roundstart \
196 roundX(a,b,c,x) \
197 "leaq (%1,%1,8), %1\n" \
198 roundend(a,b,c,x)
199
200#endif
201
202
203
204
205/* ============== Common macros ================== */
206
[1]207#define key_schedule \
[18]208 x0 -= x7 ^ XOR_CONST1; \
209 x1 ^= x0; \
210 x2 += x1;\
211 x3 -= x2 ^ ((~x1)<<19);\
212 x4 ^= x3;\
213 x5 += x4;\
214 x6 -= x5 ^ ((~x4)>>23); \
215 x7 ^= x6; \
216 x0 += x7; \
217 x1 -= x0 ^ ((~x7)<<19); \
218 x2 ^= x1; \
219 x3 += x2; \
220 x4 -= x3 ^ ((~x2)>>23); \
221 x5 ^= x4; \
222 x6 += x5; \
223 x7 -= x6 ^ XOR_CONST2;
[1]224
[18]225#define pass5n(a,b,c) \
226 round5(a,b,c,x0) \
227 x0 -= x7 ^ XOR_CONST1; \
228 round5(b,c,a,x1) \
229 x1 ^= x0; \
230 round5(c,a,b,x2) \
231 x2 += x1; \
232 round5(a,b,c,x3) \
233 x3 -= x2 ^ ((~x1)<<19); \
234 round5(b,c,a,x4) \
235 x4 ^= x3; \
236 round5(c,a,b,x5) \
237 x5 += x4; \
238 round5(a,b,c,x6) \
239 x6 -= x5 ^ ((~x4)>>23); \
240 round5(b,c,a,x7) \
241 x7 ^= x6; \
242 x0 += x7; \
243 x1 -= x0 ^ ((~x7)<<19); \
244 x2 ^= x1; \
245 x3 += x2; \
246 x4 -= x3 ^ ((~x2)>>23); \
247 x5 ^= x4; \
248 x6 += x5; \
249 x7 -= x6 ^ XOR_CONST2;
250
251#define pass7n(a,b,c) \
252 round7(a,b,c,x0) \
253 x0 -= x7 ^ XOR_CONST1; \
254 round7(b,c,a,x1) \
255 x1 ^= x0; \
256 round7(c,a,b,x2) \
257 x2 += x1; \
258 round7(a,b,c,x3) \
259 x3 -= x2 ^ ((~x1)<<19); \
260 round7(b,c,a,x4) \
261 x4 ^= x3; \
262 round7(c,a,b,x5) \
263 x5 += x4; \
264 round7(a,b,c,x6) \
265 x6 -= x5 ^ ((~x4)>>23); \
266 round7(b,c,a,x7) \
267 x7 ^= x6; \
268 x0 += x7; \
269 x1 -= x0 ^ ((~x7)<<19); \
270 x2 ^= x1; \
271 x3 += x2; \
272 x4 -= x3 ^ ((~x2)>>23); \
273 x5 ^= x4; \
274 x6 += x5; \
275 x7 -= x6 ^ XOR_CONST2;
276
277#define pass5(a,b,c) \
278 pass_start \
279 round5(a,b,c,x0) \
280 round5(b,c,a,x1) \
281 round5(c,a,b,x2) \
282 round5(a,b,c,x3) \
283 round5(b,c,a,x4) \
284 round5(c,a,b,x5) \
285 round5(a,b,c,x6) \
286 round5(b,c,a,x7) \
287 pass_end
288
289#define pass7(a,b,c) \
290 pass_start \
291 round7(a,b,c,x0) \
292 round7(b,c,a,x1) \
293 round7(c,a,b,x2) \
294 round7(a,b,c,x3) \
295 round7(b,c,a,x4) \
296 round7(c,a,b,x5) \
297 round7(a,b,c,x6) \
298 round7(b,c,a,x7) \
299 pass_end
300
301
302#define pass9(a,b,c) \
303 pass_start \
304 round9(a,b,c,x0) \
305 round9(b,c,a,x1) \
306 round9(c,a,b,x2) \
307 round9(a,b,c,x3) \
308 round9(b,c,a,x4) \
309 round9(c,a,b,x5) \
310 round9(a,b,c,x6) \
311 round9(b,c,a,x7) \
312 pass_end
313
[1]314#define feedforward \
[18]315 a ^= aa; \
316 b -= bb; \
317 c += cc;
[1]318
[18]319
320/* This version works ok with C variant and also with new asm version
321 * that just wastes a register r8
322 * reason? who knows, write forwarding is faster than keeping value
323 * in register? :)
324 */
[1]325#define compress \
[18]326 save_abc \
327 pass5n(a,b,c) \
328 pass7n(c,a,b) \
329 pass9(b,c,a) \
330 for(pass_no=3; pass_no<PASSES; pass_no++) { \
331 key_schedule \
332 pass9(a,b,c) \
333 tmpa=a; a=c; c=b; b=tmpa; \
334 } \
335 feedforward
[1]336
[18]337#define compress_old \
338 save_abc \
339 pass5(a,b,c) \
340 key_schedule \
341 pass7(c,a,b) \
342 key_schedule \
343 pass9(b,c,a) \
344 for(pass_no=3; pass_no<PASSES; pass_no++) { \
345 key_schedule \
346 pass9(a,b,c) \
347 tmpa=a; a=c; c=b; b=tmpa; \
348 } \
349 feedforward
350
[1]351#define tiger_compress_macro(str, state) \
352{ \
[18]353 register word64 a, b, c; \
354 register word64 tmpa; \
[1]355 word64 aa, bb, cc; \
[18]356 word64 x0, x1, x2, x3, x4, x5, x6, x7; \
[1]357 int pass_no; \
358\
359 a = state[0]; \
360 b = state[1]; \
361 c = state[2]; \
362\
363 x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
364 x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
365\
366 compress; \
367\
368 state[0] = a; \
369 state[1] = b; \
370 state[2] = c; \
371}
372
373void tiger_compress(word64 *str, word64 state[3])
374{
375 tiger_compress_macro(((word64*)str), ((word64*)state));
376}
377
378void tiger_t(word64 *str, word64 length, word64 res[3])
379{
380 register word64 i;
381
382#ifdef BIG_ENDIAN
383 register word64 j = 0;
384 unsigned char temp[64];
385#endif
386
387 /*
[18]388 * res[0]=0x0123456789ABCDEFLL;
389 * res[1]=0xFEDCBA9876543210LL;
390 * res[2]=0xF096A5B4C3B2E187LL;
391 */
[1]392
393 for(i=length; i>=64; i-=64)
394 {
395#ifdef BIG_ENDIAN
396 for(j=0; j<64; j++)
[18]397 temp[j^7] = ((sh_byte*)str)[j];
[1]398 tiger_compress(((word64*)temp), res);
399#else
400 tiger_compress(str, res);
401#endif
402 str += 8;
403 }
404}
405
406void tiger(word64 *str, word64 length, word64 res[3])
407{
408 register word64 i;
409 register word64 j = 0;
410 unsigned char temp[64];
411
412 /*
[18]413 * res[0]=0x0123456789ABCDEFLL;
414 * res[1]=0xFEDCBA9876543210LL;
415 * res[2]=0xF096A5B4C3B2E187LL;
416 */
[1]417
418 for(i=length; i>=64; i-=64)
419 {
420#ifdef BIG_ENDIAN
421 for(j=0; j<64; j++)
[18]422 temp[j^7] = ((sh_byte*)str)[j];
[1]423 tiger_compress(((word64*)temp), res);
424#else
425 tiger_compress(str, res);
426#endif
427 str += 8;
428 }
429
430#ifdef BIG_ENDIAN
431 for(j=0; j<i; j++)
432 temp[j^7] = ((sh_byte*)str)[j];
433
434 temp[j^7] = 0x01;
435 j++;
436 for(; j&7; j++)
437 temp[j^7] = 0;
438#else
[18]439
440#ifndef USE_MEMSET
[1]441 for(j=0; j<i; j++)
442 temp[j] = ((sh_byte*)str)[j];
[18]443#else
444 memcpy( temp, str, j=i );
445#endif
[1]446 temp[j++] = 0x01;
447 for(; j&7; j++)
[18]448 temp[j] = 0;
449
[1]450#endif
[18]451
[1]452 if(j>56)
453 {
[18]454#ifndef USE_MEMSET
[1]455 for(; j<64; j++)
456 temp[j] = 0;
[18]457#else
458 memset( temp+j, 0, 64-j);
459#endif
[1]460 tiger_compress(((word64*)temp), res);
461 j=0;
462 }
463
[18]464#ifndef USE_MEMSET
[1]465 for(; j<56; j++)
466 temp[j] = 0;
[18]467#else
468 memset( temp+j, 0, 56-j);
469#endif
470
[1]471 ((word64*)(&(temp[56])))[0] = ((word64)length)<<3;
472 tiger_compress(((word64*)temp), res);
473}
474
475#endif
Note: See TracBrowser for help on using the repository browser.