00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 #ifndef _threefry_dot_h_
00033 #define _threefry_dot_h_
00034 #include "features/compilerfeatures.h"
00035 #include "array.h"
00036 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 
00065 
00066 enum r123_enum_threefry64x4 {
00067     
00068 
00069     R_64x4_0_0=14, R_64x4_0_1=16,
00070     R_64x4_1_0=52, R_64x4_1_1=57,
00071     R_64x4_2_0=23, R_64x4_2_1=40,
00072     R_64x4_3_0= 5, R_64x4_3_1=37,
00073     R_64x4_4_0=25, R_64x4_4_1=33,
00074     R_64x4_5_0=46, R_64x4_5_1=12,
00075     R_64x4_6_0=58, R_64x4_6_1=22,
00076     R_64x4_7_0=32, R_64x4_7_1=32
00077 };
00078 
00079 enum r123_enum_threefry64x2 {
00080     
00081 
00082 
00083 
00084 
00085 
00086     R_64x2_0_0=16,
00087     R_64x2_1_0=42,
00088     R_64x2_2_0=12,
00089     R_64x2_3_0=31,
00090     R_64x2_4_0=16,
00091     R_64x2_5_0=32,
00092     R_64x2_6_0=24,
00093     R_64x2_7_0=21
00094     
00095 
00096 
00097 
00098 
00099 
00100 
00101 
00102 };
00103 
00104 enum r123_enum_threefry32x4 {
00105     
00106 
00107 
00108 
00109 
00110     R_32x4_0_0=10, R_32x4_0_1=26,
00111     R_32x4_1_0=11, R_32x4_1_1=21,
00112     R_32x4_2_0=13, R_32x4_2_1=27,
00113     R_32x4_3_0=23, R_32x4_3_1= 5,
00114     R_32x4_4_0= 6, R_32x4_4_1=20,
00115     R_32x4_5_0=17, R_32x4_5_1=11,
00116     R_32x4_6_0=25, R_32x4_6_1=10,
00117     R_32x4_7_0=18, R_32x4_7_1=20
00118 
00119     
00120 
00121 
00122 
00123 
00124 
00125 
00126 
00127 
00128 };
00129 
00130 enum r123_enum_threefry32x2 {
00131     
00132 
00133 
00134 
00135     R_32x2_0_0=13,
00136     R_32x2_1_0=15,
00137     R_32x2_2_0=26,
00138     R_32x2_3_0= 6,
00139     R_32x2_4_0=17,
00140     R_32x2_5_0=29,
00141     R_32x2_6_0=16,
00142     R_32x2_7_0=24
00143 
00144     
00145 
00146 
00147 
00148 
00149 
00150 
00151 
00152     };
00153 
00154 enum r123_enum_threefry_wcnt {
00155     WCNT2=2,
00156     WCNT4=4
00157 };
00158 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N));
00159 R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N)
00160 {
00161     return (x << (N & 63)) | (x >> ((64-N) & 63));
00162 }
00163     
00164 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N));
00165 R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N)
00166 {
00167     return (x << (N & 31)) | (x >> ((32-N) & 31));
00168 }
00169 
00170 #define SKEIN_MK_64(hi32,lo32)  ((lo32) + (((uint64_t) (hi32)) << 32))
00171 #define SKEIN_KS_PARITY64         SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
00172 #define SKEIN_KS_PARITY32         0x1BD11BDA
00173 
00174 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
00175 #define THREEFRY2x32_DEFAULT_ROUNDS 20
00176 #endif
00177 
00178 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
00179 #define THREEFRY2x64_DEFAULT_ROUNDS 20
00180 #endif
00181 
00182 #ifndef THREEFRY4x32_DEFAULT_ROUNDS
00183 #define THREEFRY4x32_DEFAULT_ROUNDS 20
00184 #endif
00185 
00186 #ifndef THREEFRY4x64_DEFAULT_ROUNDS
00187 #define THREEFRY4x64_DEFAULT_ROUNDS 20
00188 #endif
00189 
00190 #define _threefry2x_tpl(W)                                              \
00191 typedef struct r123array2x##W threefry2x##W##_ctr_t;                          \
00192 typedef struct r123array2x##W threefry2x##W##_key_t;                          \
00193 typedef struct r123array2x##W threefry2x##W##_ukey_t;                          \
00194 R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \
00195 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
00196 R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
00197 threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
00198     threefry2x##W##_ctr_t X;                                              \
00199     uint##W##_t ks[2+1];                                          \
00200     int  i;                    \
00201     R123_ASSERT(Nrounds<=32);                                           \
00202     ks[2] =  SKEIN_KS_PARITY##W;                                   \
00203     for (i=0;i < 2; i++)                                        \
00204         {                                                               \
00205             ks[i] = k.v[i];                                             \
00206             X.v[i]  = in.v[i];                                          \
00207             ks[2] ^= k.v[i];                                    \
00208         }                                                               \
00209                                                                         \
00210                                  \
00211     X.v[0] += ks[0]; X.v[1] += ks[1];                                   \
00212                                                                         \
00213     if(Nrounds>0){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00214     if(Nrounds>1){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00215     if(Nrounds>2){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00216     if(Nrounds>3){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00217     if(Nrounds>3){                                                      \
00218                                                     \
00219         X.v[0] += ks[1]; X.v[1] += ks[2];                               \
00220         X.v[1] += 1;                        \
00221     }                                                                   \
00222     if(Nrounds>4){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00223     if(Nrounds>5){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00224     if(Nrounds>6){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00225     if(Nrounds>7){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00226     if(Nrounds>7){                                                      \
00227                                                     \
00228         X.v[0] += ks[2]; X.v[1] += ks[0];                               \
00229         X.v[1] += 2;                                                    \
00230     }                                                                   \
00231     if(Nrounds>8){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00232     if(Nrounds>9){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00233     if(Nrounds>10){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00234     if(Nrounds>11){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00235     if(Nrounds>11){                                                     \
00236                                                     \
00237         X.v[0] += ks[0]; X.v[1] += ks[1];                               \
00238         X.v[1] += 3;                                                    \
00239     }                                                                   \
00240     if(Nrounds>12){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00241     if(Nrounds>13){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00242     if(Nrounds>14){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00243     if(Nrounds>15){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00244     if(Nrounds>15){                                                     \
00245                                                     \
00246         X.v[0] += ks[1]; X.v[1] += ks[2];                               \
00247         X.v[1] += 4;                                                    \
00248     }                                                                   \
00249     if(Nrounds>16){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00250     if(Nrounds>17){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00251     if(Nrounds>18){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00252     if(Nrounds>19){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00253     if(Nrounds>19){                                                     \
00254                                                     \
00255         X.v[0] += ks[2]; X.v[1] += ks[0];                               \
00256         X.v[1] += 5;                                                    \
00257     }                                                                   \
00258     if(Nrounds>20){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00259     if(Nrounds>21){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00260     if(Nrounds>22){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00261     if(Nrounds>23){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00262     if(Nrounds>23){                                                     \
00263                                                     \
00264         X.v[0] += ks[0]; X.v[1] += ks[1];                               \
00265         X.v[1] += 6;                                                    \
00266     }                                                                   \
00267     if(Nrounds>24){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00268     if(Nrounds>25){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00269     if(Nrounds>26){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00270     if(Nrounds>27){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00271     if(Nrounds>27){                                                     \
00272                                                     \
00273         X.v[0] += ks[1]; X.v[1] += ks[2];                               \
00274         X.v[1] += 7;                                                    \
00275     }                                                                   \
00276     if(Nrounds>28){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00277     if(Nrounds>29){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00278     if(Nrounds>30){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00279     if(Nrounds>31){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00280     if(Nrounds>31){                                                     \
00281                                                     \
00282         X.v[0] += ks[2]; X.v[1] += ks[0];                               \
00283         X.v[1] += 8;                                                    \
00284     }                                                                   \
00285     return X;                                                           \
00286 }                                                                       \
00287                                             \
00288 enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS };       \
00289 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
00290 R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
00291 threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
00292     return threefry2x##W##_R(threefry2x##W##_rounds, in, k);            \
00293 }
00294 
00295 
00296 #define _threefry4x_tpl(W)                                              \
00297 typedef struct r123array4x##W threefry4x##W##_ctr_t;                        \
00298 typedef struct r123array4x##W threefry4x##W##_key_t;                        \
00299 typedef struct r123array4x##W threefry4x##W##_ukey_t;                        \
00300 R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \
00301 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
00302 R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
00303 threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
00304     threefry4x##W##_ctr_t X;                                            \
00305     uint##W##_t ks[4+1];                                            \
00306     int  i;                    \
00307     R123_ASSERT(Nrounds<=72);                                           \
00308     ks[4] =  SKEIN_KS_PARITY##W;                                    \
00309     for (i=0;i < 4; i++)                                            \
00310         {                                                               \
00311             ks[i] = k.v[i];                                             \
00312             X.v[i]  = in.v[i];                                          \
00313             ks[4] ^= k.v[i];                                        \
00314         }                                                               \
00315                                                                         \
00316                                  \
00317     X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00318                                                                         \
00319     if(Nrounds>0){                                                      \
00320         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00321         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00322     }                                                                   \
00323     if(Nrounds>1){                                                      \
00324         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00325         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00326     }                                                                   \
00327     if(Nrounds>2){                                                      \
00328         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00329         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00330     }                                                                   \
00331     if(Nrounds>3){                                                      \
00332         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00333         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00334     }                                                                   \
00335     if(Nrounds>3){                                                      \
00336                                                     \
00337         X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00338         X.v[4-1] += 1;                      \
00339     }                                                                   \
00340                                                                         \
00341     if(Nrounds>4){                                                      \
00342         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00343         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00344     }                                                                   \
00345     if(Nrounds>5){                                                      \
00346         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00347         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00348     }                                                                   \
00349     if(Nrounds>6){                                                      \
00350         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00351         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00352     }                                                                   \
00353     if(Nrounds>7){                                                      \
00354         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00355         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00356     }                                                                   \
00357     if(Nrounds>7){                                                      \
00358                                                     \
00359         X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00360         X.v[4-1] += 2;                      \
00361     }                                                                   \
00362                                                                         \
00363     if(Nrounds>8){                                                      \
00364         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00365         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00366     }                                                                   \
00367     if(Nrounds>9){                                                      \
00368         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00369         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00370     }                                                                   \
00371     if(Nrounds>10){                                                     \
00372         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00373         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00374     }                                                                   \
00375     if(Nrounds>11){                                                     \
00376         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00377         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00378     }                                                                   \
00379     if(Nrounds>11){                                                     \
00380                                                     \
00381         X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00382         X.v[4-1] += 3;                      \
00383     }                                                                   \
00384                                                                         \
00385     if(Nrounds>12){                                                     \
00386         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00387         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00388     }                                                                   \
00389     if(Nrounds>13){                                                     \
00390         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00391         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00392     }                                                                   \
00393     if(Nrounds>14){                                                     \
00394         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00395         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00396     }                                                                   \
00397     if(Nrounds>15){                                                     \
00398         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00399         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00400     }                                                                   \
00401     if(Nrounds>15){                                                     \
00402                                                     \
00403         X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00404         X.v[4-1] += 4;                      \
00405     }                                                                   \
00406                                                                         \
00407     if(Nrounds>16){                                                     \
00408         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00409         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00410     }                                                                   \
00411     if(Nrounds>17){                                                     \
00412         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00413         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00414     }                                                                   \
00415     if(Nrounds>18){                                                     \
00416         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00417         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00418     }                                                                   \
00419     if(Nrounds>19){                                                     \
00420         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00421         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00422     }                                                                   \
00423     if(Nrounds>19){                                                     \
00424                                                     \
00425         X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00426         X.v[4-1] += 5;                      \
00427     }                                                                   \
00428                                                                         \
00429     if(Nrounds>20){                                                     \
00430         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00431         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00432     }                                                                   \
00433     if(Nrounds>21){                                                     \
00434         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00435         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00436     }                                                                   \
00437     if(Nrounds>22){                                                     \
00438         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00439         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00440     }                                                                   \
00441     if(Nrounds>23){                                                     \
00442         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00443         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00444     }                                                                   \
00445     if(Nrounds>23){                                                     \
00446                                                     \
00447         X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00448         X.v[4-1] += 6;                      \
00449     }                                                                   \
00450                                                                         \
00451     if(Nrounds>24){                                                     \
00452         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00453         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00454     }                                                                   \
00455     if(Nrounds>25){                                                     \
00456         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00457         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00458     }                                                                   \
00459     if(Nrounds>26){                                                     \
00460         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00461         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00462     }                                                                   \
00463     if(Nrounds>27){                                                     \
00464         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00465         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00466     }                                                                   \
00467     if(Nrounds>27){                                                     \
00468                                                     \
00469         X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00470         X.v[4-1] += 7;                      \
00471     }                                                                   \
00472                                                                         \
00473     if(Nrounds>28){                                                     \
00474         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00475         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00476     }                                                                   \
00477     if(Nrounds>29){                                                     \
00478         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00479         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00480     }                                                                   \
00481     if(Nrounds>30){                                                     \
00482         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00483         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00484     }                                                                   \
00485     if(Nrounds>31){                                                     \
00486         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00487         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00488     }                                                                   \
00489     if(Nrounds>31){                                                     \
00490                                                     \
00491         X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00492         X.v[4-1] += 8;                      \
00493     }                                                                   \
00494                                                                         \
00495     if(Nrounds>32){                                                     \
00496         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00497         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00498     }                                                                   \
00499     if(Nrounds>33){                                                     \
00500         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00501         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00502     }                                                                   \
00503     if(Nrounds>34){                                                     \
00504         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00505         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00506     }                                                                   \
00507     if(Nrounds>35){                                                     \
00508         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00509         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00510     }                                                                   \
00511     if(Nrounds>35){                                                     \
00512                                                     \
00513         X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00514         X.v[4-1] += 9;                      \
00515     }                                                                   \
00516                                                                         \
00517     if(Nrounds>36){                                                     \
00518         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00519         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00520     }                                                                   \
00521     if(Nrounds>37){                                                     \
00522         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00523         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00524     }                                                                   \
00525     if(Nrounds>38){                                                     \
00526         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00527         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00528     }                                                                   \
00529     if(Nrounds>39){                                                     \
00530         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00531         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00532     }                                                                   \
00533     if(Nrounds>39){                                                     \
00534                                                     \
00535         X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00536         X.v[4-1] += 10;                      \
00537     }                                                                   \
00538                                                                         \
00539     if(Nrounds>40){                                                     \
00540         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00541         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00542     }                                                                   \
00543     if(Nrounds>41){                                                     \
00544         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00545         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00546     }                                                                   \
00547     if(Nrounds>42){                                                     \
00548         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00549         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00550     }                                                                   \
00551     if(Nrounds>43){                                                     \
00552         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00553         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00554     }                                                                   \
00555     if(Nrounds>43){                                                     \
00556                                                     \
00557         X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00558         X.v[4-1] += 11;                     \
00559     }                                                                   \
00560                                                                         \
00561     if(Nrounds>44){                                                     \
00562         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00563         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00564     }                                                                   \
00565     if(Nrounds>45){                                                     \
00566         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00567         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00568     }                                                                   \
00569     if(Nrounds>46){                                                     \
00570         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00571         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00572     }                                                                   \
00573     if(Nrounds>47){                                                     \
00574         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00575         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00576     }                                                                   \
00577     if(Nrounds>47){                                                     \
00578                                                     \
00579         X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00580         X.v[4-1] += 12;                      \
00581     }                                                                   \
00582                                                                         \
00583     if(Nrounds>48){                                                     \
00584         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00585         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00586     }                                                                   \
00587     if(Nrounds>49){                                                     \
00588         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00589         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00590     }                                                                   \
00591     if(Nrounds>50){                                                     \
00592         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00593         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00594     }                                                                   \
00595     if(Nrounds>51){                                                     \
00596         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00597         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00598     }                                                                   \
00599     if(Nrounds>51){                                                     \
00600                                                     \
00601         X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00602         X.v[4-1] += 13;                      \
00603     }                                                                   \
00604                                                                         \
00605     if(Nrounds>52){                                                     \
00606         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00607         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00608     }                                                                   \
00609     if(Nrounds>53){                                                     \
00610         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00611         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00612     }                                                                   \
00613     if(Nrounds>54){                                                     \
00614         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00615         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00616     }                                                                   \
00617     if(Nrounds>55){                                                     \
00618         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00619         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00620     }                                                                   \
00621     if(Nrounds>55){                                                     \
00622                                                     \
00623         X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00624         X.v[4-1] += 14;                      \
00625     }                                                                   \
00626                                                                         \
00627     if(Nrounds>56){                                                     \
00628         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00629         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00630     }                                                                   \
00631     if(Nrounds>57){                                                     \
00632         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00633         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00634     }                                                                   \
00635     if(Nrounds>58){                                                     \
00636         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00637         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00638     }                                                                   \
00639     if(Nrounds>59){                                                     \
00640         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00641         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00642     }                                                                   \
00643     if(Nrounds>59){                                                     \
00644                                                     \
00645         X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00646         X.v[4-1] += 15;                      \
00647     }                                                                   \
00648                                                                         \
00649     if(Nrounds>60){                                                     \
00650         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00651         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00652     }                                                                   \
00653     if(Nrounds>61){                                                     \
00654         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00655         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00656     }                                                                   \
00657     if(Nrounds>62){                                                     \
00658         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00659         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00660     }                                                                   \
00661     if(Nrounds>63){                                                     \
00662         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00663         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00664     }                                                                   \
00665     if(Nrounds>63){                                                     \
00666                                                     \
00667         X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00668         X.v[4-1] += 16;                      \
00669     }                                                                   \
00670                                                                         \
00671     if(Nrounds>64){                                                     \
00672         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00673         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00674     }                                                                   \
00675     if(Nrounds>65){                                                     \
00676         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00677         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00678     }                                                                   \
00679     if(Nrounds>66){                                                     \
00680         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00681         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00682     }                                                                   \
00683     if(Nrounds>67){                                                     \
00684         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00685         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00686     }                                                                   \
00687     if(Nrounds>67){                                                     \
00688                                                     \
00689         X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00690         X.v[4-1] += 17;                      \
00691     }                                                                   \
00692                                                                         \
00693     if(Nrounds>68){                                                     \
00694         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00695         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00696     }                                                                   \
00697     if(Nrounds>69){                                                     \
00698         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00699         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00700     }                                                                   \
00701     if(Nrounds>70){                                                     \
00702         X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00703         X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00704     }                                                                   \
00705     if(Nrounds>71){                                                     \
00706         X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00707         X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00708     }                                                                   \
00709     if(Nrounds>71){                                                     \
00710                                                     \
00711         X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00712         X.v[4-1] += 18;                      \
00713     }                                                                   \
00714                                                                         \
00715     return X;                                                           \
00716 }                                                                       \
00717                                             \
00718 enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS };       \
00719 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
00720 R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
00721 threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
00722     return threefry4x##W##_R(threefry4x##W##_rounds, in, k);            \
00723 }
00724 
00726 _threefry2x_tpl(64)
00727 _threefry2x_tpl(32)
00728 _threefry4x_tpl(64)
00729 _threefry4x_tpl(32)
00730 
00731 
00732 
00733 #define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k)
00734 #define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k)
00735 #define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k)
00736 #define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k)
00737 
00738 #ifdef __cplusplus
00739 
00740 #define _threefryNxWclass_tpl(NxW)                                      \
00741 namespace r123{                                                     \
00742 template<unsigned int R>                                                  \
00743  struct Threefry##NxW##_R{                                              \
00744     typedef threefry##NxW##_ctr_t ctr_type;                             \
00745     typedef threefry##NxW##_key_t key_type;                             \
00746     typedef threefry##NxW##_key_t ukey_type;                            \
00747     static const unsigned int rounds=R;                                 \
00748    inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \
00749         R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \
00750         return threefry##NxW##_R(R, ctr, key);                              \
00751     }                                                                   \
00752 };                                                                      \
00753  typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW;       \
00754 } // namespace r123
00755 
00758 _threefryNxWclass_tpl(2x32)
00759 _threefryNxWclass_tpl(4x32)
00760 _threefryNxWclass_tpl(2x64)
00761 _threefryNxWclass_tpl(4x64)
00762 
00763 
00764 
00765 
00862 #endif
00863 
00864 #endif