Changeset 18 for trunk/src/sh_tiger1.c


Ignore:
Timestamp:
Jan 28, 2006, 9:07:52 PM (19 years ago)
Author:
rainer
Message:

Optimized version of tiger algorithm, and basic ingredients for unit testing (part 2)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/sh_tiger1.c

    r1 r18  
    55#include "config_xor.h"
    66
    7 
    8 #if !defined(HAVE_LONG_64) && !defined(HAVE_LONG_LONG_64)
     7/* we already inline in the function used for file checksums */
     8/* #define UNROLL_COMPRESS */
     9#undef UNROLL_COMPRESS
     10
     11#if !defined(TIGER_64_BIT)
    912
    1013/* Tiger: A Fast New Hash Function
     
    3740typedef unsigned short sh_word32;
    3841#else
    39 #error No 32 byte type found !
     42#error No 32 bit type found !
    4043#endif
    4144
     
    4649#define BIG_ENDIAN
    4750#endif
    48 
    49 /* NOTE that this code is NOT FULLY OPTIMIZED for any  */
    50 /* machine. Assembly code might be much faster on some */
    51 /* machines, especially if the code is compiled with   */
    52 /* gcc.                                                */
    5351
    5452/* The number of passes of the hash function.          */
     
    6563#define t4 (tiger_table+256*3)
    6664
    67 #define sh_sub64(s0, s1, p0, p1) \
     65#define sub64(s0, s1, p0, p1) \
    6866      temps0 = (p0); \
    6967      tcarry = s0 < temps0; \
     
    7169      s1 -= (p1) + tcarry;
    7270
    73 #define sh_add64(s0, s1, p0, p1) \
     71#define add64(s0, s1, p0, p1) \
    7472      temps0 = (p0); \
    7573      s0 += temps0; \
     
    7775      s1 += (p1) + tcarry;
    7876
    79 #define sh_xor64(s0, s1, p0, p1) \
     77#define xor64(s0, s1, p0, p1) \
    8078      s0 ^= (p0); \
    8179      s1 ^= (p1);
    8280
    83 #define sh_mul5(s0, s1) \
     81#define mul5(s0, s1) \
    8482      tempt0 = s0<<2; \
    8583      tempt1 = (s1<<2)|(s0>>30); \
    86       sh_add64(s0, s1, tempt0, tempt1);
    87 
    88 #define sh_mul7(s0, s1) \
     84      add64(s0, s1, tempt0, tempt1);
     85
     86#define mul7(s0, s1) \
    8987      tempt0 = s0<<3; \
    9088      tempt1 = (s1<<3)|(s0>>29); \
    91       sh_sub64(tempt0, tempt1, s0, s1); \
     89      sub64(tempt0, tempt1, s0, s1); \
    9290      s0 = tempt0; \
    9391      s1 = tempt1;
    9492
    95 #define sh_mul9(s0, s1) \
     93#define mul9(s0, s1) \
    9694      tempt0 = s0<<3; \
    9795      tempt1 = (s1<<3)|(s0>>29); \
    98       sh_add64(s0, s1, tempt0, tempt1);
    99 
    100 #define sh_save_abc \
     96      add64(s0, s1, tempt0, tempt1);
     97
     98#define save_abc \
    10199      aa0 = a0; \
    102100      aa1 = a1; \
     
    106104      cc1 = c1;
    107105
    108 #define sh_round(a0,a1,b0,b1,c0,c1,x0,x1,mul) \
    109       sh_xor64(c0, c1, x0, x1); \
     106#define roundX(a0,a1,b0,b1,c0,c1,x0,x1) \
     107      xor64(c0, c1, x0, x1); \
    110108      temp0  = t1[((c0)>>(0*8))&0xFF][0] ; \
    111109      temp1  = t1[((c0)>>(0*8))&0xFF][1] ; \
     
    116114      temp0 ^= t4[((c1)>>(2*8))&0xFF][0] ; \
    117115      temp1 ^= t4[((c1)>>(2*8))&0xFF][1] ; \
    118       sh_sub64(a0, a1, temp0, temp1); \
     116      sub64(a0, a1, temp0, temp1); \
    119117      temp0  = t4[((c0)>>(1*8))&0xFF][0] ; \
    120118      temp1  = t4[((c0)>>(1*8))&0xFF][1] ; \
     
    125123      temp0 ^= t1[((c1)>>(3*8))&0xFF][0] ; \
    126124      temp1 ^= t1[((c1)>>(3*8))&0xFF][1] ; \
    127       sh_add64(b0, b1, temp0, temp1); \
    128       if((mul)==5) \
    129         {sh_mul5(b0, b1);} \
    130       else \
    131         if((mul)==7) \
    132           {sh_mul7(b0, b1);} \
    133         else \
    134           {sh_mul9(b0, b1)};
    135 
    136 #define sh_pass(a0,a1,b0,b1,c0,c1,mul) \
    137       sh_round(a0,a1,b0,b1,c0,c1,x00,x01,mul); \
    138       sh_round(b0,b1,c0,c1,a0,a1,x10,x11,mul); \
    139       sh_round(c0,c1,a0,a1,b0,b1,x20,x21,mul); \
    140       sh_round(a0,a1,b0,b1,c0,c1,x30,x31,mul); \
    141       sh_round(b0,b1,c0,c1,a0,a1,x40,x41,mul); \
    142       sh_round(c0,c1,a0,a1,b0,b1,x50,x51,mul); \
    143       sh_round(a0,a1,b0,b1,c0,c1,x60,x61,mul); \
    144       sh_round(b0,b1,c0,c1,a0,a1,x70,x71,mul);
    145 
    146 #define sh_key_schedule \
    147       sh_sub64(x00, x01, x70^0xA5A5A5A5, x71^0xA5A5A5A5); \
    148       sh_xor64(x10, x11, x00, x01); \
    149       sh_add64(x20, x21, x10, x11); \
    150       sh_sub64(x30, x31, x20^((~x10)<<19), ~x21^(((x11)<<19)|((x10)>>13))); \
    151       sh_xor64(x40, x41, x30, x31); \
    152       sh_add64(x50, x51, x40, x41); \
    153       sh_sub64(x60, x61, ~x50^(((x40)>>23)|((x41)<<9)), x51^((~x41)>>23)); \
    154       sh_xor64(x70, x71, x60, x61); \
    155       sh_add64(x00, x01, x70, x71); \
    156       sh_sub64(x10, x11, x00^((~x70)<<19), ~x01^(((x71)<<19)|((x70)>>13))); \
    157       sh_xor64(x20, x21, x10, x11); \
    158       sh_add64(x30, x31, x20, x21); \
    159       sh_sub64(x40, x41, ~x30^(((x20)>>23)|((x21)<<9)), x31^((~x21)>>23)); \
    160       sh_xor64(x50, x51, x40, x41); \
    161       sh_add64(x60, x61, x50, x51); \
    162       sh_sub64(x70, x71, x60^0x89ABCDEF, x61^0x01234567);
    163 
    164 #define sh_feedforward \
    165       sh_xor64(a0, a1, aa0, aa1); \
    166       sh_sub64(b0, b1, bb0, bb1); \
    167       sh_add64(c0, c1, cc0, cc1);
    168 
    169 #ifdef UNROLL_COMPRESS
    170 #define sh_compress \
    171       sh_save_abc \
    172       sh_pass(a0,a1,b0,b1,c0,c1,5); \
    173       sh_key_schedule; \
    174       sh_pass(c0,c1,a0,a1,b0,b1,7); \
    175       sh_key_schedule; \
    176       sh_pass(b0,b1,c0,c1,a0,a1,9); \
    177       for(pass_no=3; pass_no<PASSES; pass_no++) { \
    178         sh_key_schedule \
    179         sh_pass(a0,a1,b0,b1,c0,c1,9); \
    180         tmpa=a0; a0=c0; c0=b0; b0=tmpa; \
    181         tmpa=a1; a1=c1; c1=b1; b1=tmpa;} \
    182       sh_feedforward
    183 #else
    184 #define sh_compress \
    185       sh_save_abc \
    186       for(pass_no=0; pass_no<PASSES; pass_no++) { \
    187         if(pass_no != 0) {sh_key_schedule} \
    188         sh_pass(a0,a1,b0,b1,c0,c1,(pass_no==0?5:pass_no==1?7:9)) \
    189         tmpa=a0; a0=c0; c0=b0; b0=tmpa; \
    190         tmpa=a1; a1=c1; c1=b1; b1=tmpa;} \
    191       sh_feedforward
    192 #endif
     125      add64(b0, b1, temp0, temp1);
     126
     127
     128#define round5(a0,a1,b0,b1,c0,c1,x0,x1) \
     129      roundX(a0,a1,b0,b1,c0,c1,x0,x1); \
     130      mul5(b0, b1);
     131
     132#define round7(a0,a1,b0,b1,c0,c1,x0,x1) \
     133      roundX(a0,a1,b0,b1,c0,c1,x0,x1); \
     134      mul7(b0, b1);
     135
     136#define round9(a0,a1,b0,b1,c0,c1,x0,x1) \
     137      roundX(a0,a1,b0,b1,c0,c1,x0,x1); \
     138      mul9(b0, b1);
     139
     140
     141/* mixed with key_schedule
     142 */
     143#define pass5(a0,a1,b0,b1,c0,c1) \
     144      round5(a0,a1,b0,b1,c0,c1,x00,x01); \
     145      sub64(x00, x01, x70^0xA5A5A5A5, x71^0xA5A5A5A5); \
     146      round5(b0,b1,c0,c1,a0,a1,x10,x11); \
     147      xor64(x10, x11, x00, x01); \
     148      round5(c0,c1,a0,a1,b0,b1,x20,x21); \
     149      add64(x20, x21, x10, x11); \
     150      round5(a0,a1,b0,b1,c0,c1,x30,x31); \
     151      sub64(x30, x31, x20^((~x10)<<19), ~x21^(((x11)<<19)|((x10)>>13))); \
     152      round5(b0,b1,c0,c1,a0,a1,x40,x41); \
     153      xor64(x40, x41, x30, x31); \
     154      round5(c0,c1,a0,a1,b0,b1,x50,x51); \
     155      add64(x50, x51, x40, x41); \
     156      round5(a0,a1,b0,b1,c0,c1,x60,x61); \
     157      sub64(x60, x61, ~x50^(((x40)>>23)|((x41)<<9)), x51^((~x41)>>23)); \
     158      round5(b0,b1,c0,c1,a0,a1,x70,x71);
     159
     160/* mixed with key_schedule
     161 */
     162#define pass7(a0,a1,b0,b1,c0,c1) \
     163      round7(a0,a1,b0,b1,c0,c1,x00,x01); \
     164      sub64(x00, x01, x70^0xA5A5A5A5, x71^0xA5A5A5A5); \
     165      round7(b0,b1,c0,c1,a0,a1,x10,x11); \
     166      xor64(x10, x11, x00, x01); \
     167      round7(c0,c1,a0,a1,b0,b1,x20,x21); \
     168      add64(x20, x21, x10, x11); \
     169      round7(a0,a1,b0,b1,c0,c1,x30,x31); \
     170      sub64(x30, x31, x20^((~x10)<<19), ~x21^(((x11)<<19)|((x10)>>13))); \
     171      round7(b0,b1,c0,c1,a0,a1,x40,x41); \
     172      xor64(x40, x41, x30, x31); \
     173      round7(c0,c1,a0,a1,b0,b1,x50,x51); \
     174      add64(x50, x51, x40, x41); \
     175      round7(a0,a1,b0,b1,c0,c1,x60,x61); \
     176      sub64(x60, x61, ~x50^(((x40)>>23)|((x41)<<9)), x51^((~x41)>>23)); \
     177      round7(b0,b1,c0,c1,a0,a1,x70,x71);
     178
     179/* mixed with key_schedule
     180 */
     181#define pass9(a0,a1,b0,b1,c0,c1) \
     182      round9(a0,a1,b0,b1,c0,c1,x00,x01); \
     183      sub64(x00, x01, x70^0xA5A5A5A5, x71^0xA5A5A5A5); \
     184      round9(b0,b1,c0,c1,a0,a1,x10,x11); \
     185      xor64(x10, x11, x00, x01); \
     186      round9(c0,c1,a0,a1,b0,b1,x20,x21); \
     187      add64(x20, x21, x10, x11); \
     188      round9(a0,a1,b0,b1,c0,c1,x30,x31); \
     189      sub64(x30, x31, x20^((~x10)<<19), ~x21^(((x11)<<19)|((x10)>>13))); \
     190      round9(b0,b1,c0,c1,a0,a1,x40,x41); \
     191      xor64(x40, x41, x30, x31); \
     192      round9(c0,c1,a0,a1,b0,b1,x50,x51); \
     193      add64(x50, x51, x40, x41); \
     194      round9(a0,a1,b0,b1,c0,c1,x60,x61); \
     195      sub64(x60, x61, ~x50^(((x40)>>23)|((x41)<<9)), x51^((~x41)>>23)); \
     196      round9(b0,b1,c0,c1,a0,a1,x70,x71);
     197
     198#define key_schedule \
     199      xor64(x70, x71, x60, x61); \
     200      add64(x00, x01, x70, x71); \
     201      sub64(x10, x11, x00^((~x70)<<19), ~x01^(((x71)<<19)|((x70)>>13))); \
     202      xor64(x20, x21, x10, x11); \
     203      add64(x30, x31, x20, x21); \
     204      sub64(x40, x41, ~x30^(((x20)>>23)|((x21)<<9)), x31^((~x21)>>23)); \
     205      xor64(x50, x51, x40, x41); \
     206      add64(x60, x61, x50, x51); \
     207      sub64(x70, x71, x60^0x89ABCDEF, x61^0x01234567);
     208
     209#define feedforward \
     210      xor64(a0, a1, aa0, aa1); \
     211      sub64(b0, b1, bb0, bb1); \
     212      add64(c0, c1, cc0, cc1);
     213
     214#define compress \
     215      pass5(a0,a1,b0,b1,c0,c1); \
     216      key_schedule; \
     217      pass7(c0,c1,a0,a1,b0,b1); \
     218      key_schedule; \
     219      pass9(b0,b1,c0,c1,a0,a1); \
     220      feedforward
    193221
    194222#define tiger_compress_macro(str, state) \
    195223{ \
    196   register sh_word32 a0, a1, b0, b1, c0, c1, tmpa; \
     224  register sh_word32 a0, a1, b0, b1, c0, c1; \
    197225  sh_word32 aa0, aa1, bb0, bb1, cc0, cc1; \
    198226  sh_word32 x00, x01, x10, x11, x20, x21, x30, x31, \
    199          x40, x41, x50, x51, x60, x61, x70, x71; \
    200   register sh_word32 temp0, temp1, tempt0, tempt1, temps0, tcarry; \
    201   int pass_no; \
     227                  x40, x41, x50, x51, x60, x61, x70, x71; \
     228  sh_word32 temp0, temp1, tempt0, tempt1, temps0, tcarry; \
    202229\
    203230  a0 = state[0]; \
     
    208235  c1 = state[5]; \
    209236\
     237      save_abc \
     238\
    210239  x00=str[0*2]; x01=str[0*2+1]; x10=str[1*2]; x11=str[1*2+1]; \
    211240  x20=str[2*2]; x21=str[2*2+1]; x30=str[3*2]; x31=str[3*2+1]; \
     
    213242  x60=str[6*2]; x61=str[6*2+1]; x70=str[7*2]; x71=str[7*2+1]; \
    214243\
    215   sh_compress; \
     244  compress; \
    216245\
    217246  state[0] = a0; \
     
    223252}
    224253
    225 #ifdef UNROLL_COMPRESS
     254#if defined(UNROLL_COMPRESS)
    226255/* The compress function is inlined */
    227256#define tiger_compress(str, state) \
    228257  tiger_compress_macro(((sh_word32*)str), ((sh_word32*)state))
    229 #else
    230 /* The compress function is a function */
     258
     259#else
     260
    231261void
    232262tiger_compress(sh_word32 *str, sh_word32 state[6])
     
    249279#ifdef BIG_ENDIAN
    250280      for(j=0; j<64; j++)
    251         temp[j^3] = ((sh_byte*)str)[j];
    252       tiger_compress(((sh_word32*)temp), res);
    253 #else
    254       tiger_compress(str, res);
     281        temp[j^3] = ((sh_byte*)str)[j];
     282      tiger_compress_macro(((sh_word32*)temp), res);
     283#else
     284      tiger_compress_macro(str, res);
    255285#endif
    256286      str += 16;
     
    258288}
    259289
    260 void
    261 tiger(sh_word32 *str, sh_word32 length, sh_word32 res[6])
     290
     291void tiger(sh_word32 *str, sh_word32 length, sh_word32 res[6])
    262292{
    263293  register sh_word32 i, j;
    264294  sh_byte temp[64];
     295
     296  /*
     297   * res[0]=0x89ABCDEF;
     298   * res[1]=0x01234567;
     299   * res[2]=0x76543210;
     300   * res[3]=0xFEDCBA98;
     301   * res[4]=0xC3B2E187;
     302   * res[5]=0xF096A5B4;
     303   */
    265304
    266305  for(i=length; i>=64; i-=64)
     
    307346}
    308347
    309 #else
    310 void dummy_1 (int a)
    311 {
    312   (void) a;
    313   return;
    314 }
    315 #endif
    316 
    317 
    318 
    319 
    320 
    321 
    322 
    323 
    324 
     348#endif
     349
Note: See TracChangeset for help on using the changeset viewer.