00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 #ifndef __Random123_ars_dot_hpp__
00033 #define __Random123_ars_dot_hpp__
00034 
00035 #include "features/compilerfeatures.h"
00036 #include "array.h"
00037 
00038 #if R123_USE_AES_NI
00039 
00040 #ifndef ARS1xm128i_DEFAULT_ROUNDS
00041 #define ARS1xm128i_DEFAULT_ROUNDS 7
00042 #endif
00043 
00045 enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00046 
00047 
00049 typedef struct r123array1xm128i ars1xm128i_ctr_t;
00051 typedef struct r123array1xm128i ars1xm128i_key_t;
00053 typedef struct r123array1xm128i ars1xm128i_ukey_t;
00055 R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
00057 R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
00058     __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), 
00059                                    R123_64BIT(0x9E3779B97F4A7C15)); 
00060     
00061 
00062 
00063     __m128i kk = k.v[0].m;
00064     __m128i v = _mm_xor_si128(in.v[0].m, kk);
00065     ars1xm128i_ctr_t ret;
00066     R123_ASSERT(Nrounds<=10);
00067     if( Nrounds>1 ){
00068         kk = _mm_add_epi64(kk, kweyl);
00069         v = _mm_aesenc_si128(v, kk);
00070     }
00071     if( Nrounds>2 ){
00072         kk = _mm_add_epi64(kk, kweyl);
00073         v = _mm_aesenc_si128(v, kk);
00074     }
00075     if( Nrounds>3 ){
00076         kk = _mm_add_epi64(kk, kweyl);
00077         v = _mm_aesenc_si128(v, kk);
00078     }
00079     if( Nrounds>4 ){
00080         kk = _mm_add_epi64(kk, kweyl);
00081         v = _mm_aesenc_si128(v, kk);
00082     }
00083     if( Nrounds>5 ){
00084         kk = _mm_add_epi64(kk, kweyl);
00085         v = _mm_aesenc_si128(v, kk);
00086     }
00087     if( Nrounds>6 ){
00088         kk = _mm_add_epi64(kk, kweyl);
00089         v = _mm_aesenc_si128(v, kk);
00090     }
00091     if( Nrounds>7 ){
00092         kk = _mm_add_epi64(kk, kweyl);
00093         v = _mm_aesenc_si128(v, kk);
00094     }
00095     if( Nrounds>8 ){
00096         kk = _mm_add_epi64(kk, kweyl);
00097         v = _mm_aesenc_si128(v, kk);
00098     }
00099     if( Nrounds>9 ){
00100         kk = _mm_add_epi64(kk, kweyl);
00101         v = _mm_aesenc_si128(v, kk);
00102     }
00103     kk = _mm_add_epi64(kk, kweyl);
00104     v = _mm_aesenclast_si128(v, kk);
00105     ret.v[0].m = v;
00106     return ret;
00107 }
00108 
00112 #define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
00113 
00115 typedef struct r123array4x32 ars4x32_ctr_t;
00117 typedef struct r123array4x32 ars4x32_key_t;
00119 typedef struct r123array4x32 ars4x32_ukey_t;
00121 enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00123 R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
00125 R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
00126     ars1xm128i_ctr_t c128;
00127     ars1xm128i_key_t k128;
00128     c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
00129     k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
00130     c128 = ars1xm128i_R(Nrounds, c128, k128);
00131     _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
00132     return c;
00133 }
00134 
00138 #define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
00139 
00140 #ifdef __cplusplus
00141 namespace r123{
00163 template<unsigned int ROUNDS>
00164 struct ARS1xm128i_R{
00165     typedef ars1xm128i_ctr_t ctr_type;
00166     typedef ars1xm128i_key_t key_type;
00167     typedef ars1xm128i_key_t ukey_type;
00168     static const unsigned int rounds=ROUNDS;
00169     R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00170         return ars1xm128i_R(ROUNDS, ctr, key);
00171     }
00172 };
00173 
00178 template<unsigned int ROUNDS>
00179 struct ARS4x32_R{
00180     typedef ars4x32_ctr_t ctr_type;
00181     typedef ars4x32_key_t key_type;
00182     typedef ars4x32_key_t ukey_type;
00183     static const unsigned int rounds=ROUNDS;
00184     R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00185         return ars4x32_R(ROUNDS, ctr, key);
00186     }
00187 };
00196 typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
00197 typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
00198 } 
00199 
00200 #endif 
00201 
00202 #endif 
00203 
00204 #endif