00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__MODULAR_SIMD__HPP
00014 #define __MMX__MODULAR_SIMD__HPP
00015 #include <numerix/modular_int.hpp>
00016 #include <algebramix/vector_sse.hpp>
00017 #if defined(NUMERIX_ENABLE_SIMD) && defined(ALGEBRAMIX_ENABLE_SIMD)
00018 #include <algebramix/vector_modular.hpp>
00019
00020 namespace mmx {
00021 #define Modulus modulus<C,V>
00022 #define Modular modular<Modulus,W>
00023
00024
00025
00026
00027
00028 template<bool b, typename C, typename V, typename W>
00029 struct modular_align_helper {
00030 static const nat m = 1;
00031 static const nat log_m = int_bitsize_helper<nat, m>::value - 1;
00032 static const nat hi_mask_m = ((nat) -1) << log_m;
00033 static const nat lo_mask_m = m - 1;
00034 static const intptr_t len = m * sizeof (C);
00035 static const intptr_t log_len= int_bitsize_helper<nat, len>::value - 1;
00036 static const intptr_t lo_mask_len= len - 1;
00037 };
00038
00039 template<typename C, typename V, typename W>
00040 struct modular_align_helper<true,C,V,W> {
00041 static const nat m = Simd_size(C);
00042 static const nat log_m = int_bitsize_helper<nat, m>::value - 1;
00043 static const nat hi_mask_m = ((nat) -1) << log_m;
00044 static const nat lo_mask_m = m - 1;
00045 static const intptr_t len = m * sizeof (C);
00046 static const intptr_t log_len= int_bitsize_helper<nat, len>::value - 1;
00047 static const intptr_t lo_mask_len= len - 1;
00048 };
00049
00050 template<typename C, typename V, typename W>
00051 struct align_helper<Modular>:
00052 modular_align_helper<sizeof(Modular) == sizeof(C),C,V,W> {};
00053
00054
00055
00056
00057
00058 template<typename C>
00059 struct simd_modulus_add_helper {
00060 typedef typename Simd_base_type(C) U;
00061 C p, p_minus_1;
00062 simd_modulus_add_helper (const U& p2) {
00063 p= simd_set_duplicate_helper<C>::op (p2);
00064 p_minus_1= simd_set_duplicate_helper<C>::op ((U) (p2 - 1)); }
00065 };
00066
00067 template<typename C>
00068 struct simd_modulus_mul_helper {
00069 typedef typename Simd_base_type(C) U;
00070 typedef typename stdint_of_helper<typename
00071 unsigned_int_with_double_size_helper<U>::type>::type L;
00072 typedef typename Simd_type(L) D;
00073 int s,t;
00074 C p;
00075 C q;
00076 C p_minus_1;
00077 C zero;
00078 C lo_mask;
00079 D p_D;
00080 D q_D;
00081 D p_minus_1_D;
00082 D lo_mask_D;
00083 simd_modulus_mul_helper (const U& p2, const U& q2, nat s2, nat t2) {
00084 s= s2;
00085 t= t2;
00086 p= simd_set_duplicate ((U) p2);
00087 q= simd_set_duplicate ((U) q2);
00088 p_minus_1= simd_set_duplicate_helper<C>::op ((U) (p2 - 1));
00089 zero= simd_set_duplicate_helper<C>::op ((U) 0);
00090 lo_mask= simd_set_duplicate_helper<C>
00091 ::op (((U) (-1ull)) >> (4*sizeof(U)));
00092 p_D= simd_set_duplicate_helper<D>::op ((L) p2);
00093 q_D= simd_set_duplicate_helper<D>::op ((L) q2);
00094 p_minus_1_D= simd_set_duplicate_helper<D>::op ((L) (p2 - 1));
00095 lo_mask_D= simd_set_duplicate_helper<D>
00096 ::op (((L) (-1ull)) >> (8*sizeof(U))); }
00097 };
00098
00099
00100
00101
00102
00103 #ifdef __SSE2__
00104 template<typename VV, typename VW,
00105 typename C, typename V, typename W>
00106 struct vec_unary_simd_helper <true,VV,VW,id_op,Modular> {
00107 static inline void op (Modular* dest, const Modular* s, nat n) {
00108 typedef typename Simd_type (C) simd_C;
00109 typedef implementation<vector_abstractions,VW> SVec;
00110 static const nat m= Simd_size (C);
00111 SVec::template vec_unary<id_op, simd_C, simd_C>
00112 ((simd_C*) dest, (const simd_C*) s, n / m); }
00113 };
00114
00115 template<typename V, typename W>
00116 struct sse_has_helper<id_op, modular<modulus<int8_t,V>,W> > {
00117 static const bool value= sizeof(modular<modulus<int8_t,V>,W>)
00118 == sizeof(int8_t); };
00119 template<typename V, typename W>
00120 struct sse_has_helper<id_op, modular<modulus<uint8_t,V>,W> > {
00121 static const bool value= sizeof(modular<modulus<uint8_t,V>,W>)
00122 == sizeof(uint8_t); };
00123 template<typename V, typename W>
00124 struct sse_has_helper<id_op, modular<modulus<int16_t,V>,W> > {
00125 static const bool value= sizeof(modular<modulus<int16_t,V>,W>)
00126 == sizeof(int16_t); };
00127 template<typename V, typename W>
00128 struct sse_has_helper<id_op, modular<modulus<uint16_t,V>,W> > {
00129 static const bool value= sizeof(modular<modulus<uint16_t,V>,W>)
00130 == sizeof(uint16_t); };
00131 template<typename V, typename W>
00132 struct sse_has_helper<id_op, modular<modulus<int32_t,V>,W> > {
00133 static const bool value= sizeof(modular<modulus<int32_t,V>,W>)
00134 == sizeof(int32_t); };
00135 template<typename V, typename W>
00136 struct sse_has_helper<id_op, modular<modulus<uint32_t,V>,W> > {
00137 static const bool value= sizeof(modular<modulus<uint32_t,V>,W>)
00138 == sizeof(uint32_t); };
00139 template<typename V, typename W>
00140 struct sse_has_helper<id_op, modular<modulus<int64_t,V>,W> > {
00141 static const bool value= sizeof(modular<modulus<int64_t,V>,W>)
00142 == sizeof(int64_t); };
00143 template<typename V, typename W>
00144 struct sse_has_helper<id_op, modular<modulus<uint64_t,V>,W> > {
00145 static const bool value= sizeof(modular<modulus<uint64_t,V>,W>)
00146 == sizeof(uint64_t); };
00147 #endif // __SSE2__
00148
00149
00150
00151
00152
00153 struct unknown_simd_op_mod {};
00154
00155 template<typename Op,typename C,nat s>
00156 struct simd_op_mod_helper { typedef unknown_simd_op_mod type; };
00157
00158 template<typename VV, typename VW, typename Op,
00159 typename C, typename V, typename W>
00160 struct vec_unary_simd_helper <true,VV,VW,Op,Modular> {
00161 static inline void op (Modular* dest, const Modular* s, nat n) {
00162 typedef typename stdint_of_helper<C>::type S;
00163 typedef typename unsigned_of_helper<S>::type U;
00164 typedef typename Simd_type (U) simd_U;
00165 static const int k= V::template maximum_size_helper<C>::value;
00166 typedef typename simd_op_mod_helper<Op,U,k>::type simd_Op;
00167 typedef implementation<vector_abstractions,VW> SVec;
00168 static const nat m= Simd_size (U);
00169 Modulus _p= Modular::get_modulus ();
00170 simd_modulus_add_helper<simd_U> simd_p (_p.p);
00171 SVec::template vec_binary_scalar<simd_Op, simd_U, simd_U>
00172 ((simd_U*) dest, (const simd_U*) s, simd_p, n / m); }
00173 };
00174
00175 template<typename VV, typename VW, typename Op,
00176 typename C, typename V, typename W>
00177 struct vec_binary_simd_helper <true,VV,VW,Op,Modular> {
00178 static inline void op (Modular* dest, const Modular* s1,
00179 const Modular* s2, nat n) {
00180 typedef typename stdint_of_helper<C>::type S;
00181 typedef typename unsigned_of_helper<S>::type U;
00182 typedef typename Simd_type (U) simd_U;
00183 static const int k= V::template maximum_size_helper<C>::value;
00184 typedef typename simd_op_mod_helper<Op,U,k>::type simd_Op;
00185 typedef implementation<vector_abstractions,VW> SVec;
00186 static const nat m= Simd_size (U);
00187 Modulus _p= Modular::get_modulus ();
00188 simd_modulus_add_helper<simd_U> simd_p (_p.p);
00189 SVec::template vec_ternary_scalar<simd_Op, simd_U, simd_U, simd_U>
00190 ((simd_U*) dest, (const simd_U*) s1, (const simd_U*) s2,
00191 simd_p, n / m); }
00192 };
00193
00194 template<typename VV, typename VW, typename Op,
00195 typename C, typename V, typename W>
00196 struct vec_binary_scalar_simd_helper <true,VV,VW,Op,Modular> {
00197 static inline void op (Modular* dest, const Modular* s,
00198 const Modular& x, const nat n) {
00199 typedef typename stdint_of_helper<C>::type S;
00200 typedef typename unsigned_of_helper<S>::type U;
00201 typedef typename Simd_type (U) simd_U;
00202 static const int k= V::template maximum_size_helper<C>::value;
00203 typedef typename simd_op_mod_helper<Op,U,k>::type simd_Op;
00204 typedef implementation<vector_abstractions,VW> SVec;
00205 static const nat m= Simd_size (U);
00206 Modulus _p= Modular::get_modulus ();
00207 simd_modulus_add_helper<simd_U> simd_p (_p.p);
00208 simd_U vx= simd_set_duplicate (*x);
00209 SVec::template vec_ternary_scalar_scalar<simd_Op, simd_U, simd_U, simd_U>
00210 ((simd_U*) dest, (const simd_U*) s, vx, simd_p, n / m); }
00211 };
00212
00213
00214
00215
00216
00217 #ifdef __SSE4_2__
00218
00219 struct simd_neg_mod_op {
00220 static generic name () { return "simd_minus_mod"; }
00221 template<typename C> static inline C
00222 op (const C& x, const simd_modulus_add_helper<C>& p) {
00223 C y= -x;
00224 return min (y, p.p + y); }
00225 static inline sse_uint64_t
00226 op (const sse_uint64_t& x,
00227 const simd_modulus_add_helper<sse_uint64_t>& p) {
00228 return (p.p & (~ simd_equal (x, _zero_uint64))) - x; }
00229 template<typename C> static inline void
00230 set_op (C& x, const simd_modulus_add_helper<C>& p) { x= op (x, p); }
00231 template<typename C> static inline void
00232 set_op (C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00233 x= op (y, p); }
00234 };
00235
00236 template<typename C,nat s>
00237 struct simd_op_mod_helper<neg_op,C,s> { typedef simd_neg_mod_op type; };
00238
00239 template<typename V, typename W>
00240 struct sse_has_helper<neg_op, modular<modulus<int8_t,V>,W> > {
00241 static const bool value= sizeof(modular<modulus<int8_t,V>,W>)
00242 == sizeof(int8_t); };
00243 template<typename V, typename W>
00244 struct sse_has_helper<neg_op, modular<modulus<uint8_t,V>,W> > {
00245 static const bool value= sizeof(modular<modulus<uint8_t,V>,W>)
00246 == sizeof(uint8_t); };
00247 template<typename V, typename W>
00248 struct sse_has_helper<neg_op, modular<modulus<int16_t,V>,W> > {
00249 static const bool value= sizeof(modular<modulus<int16_t,V>,W>)
00250 == sizeof(int16_t); };
00251 template<typename V, typename W>
00252 struct sse_has_helper<neg_op, modular<modulus<uint16_t,V>,W> > {
00253 static const bool value= sizeof(modular<modulus<uint16_t,V>,W>)
00254 == sizeof(uint16_t); };
00255 template<typename V, typename W>
00256 struct sse_has_helper<neg_op, modular<modulus<int32_t,V>,W> > {
00257 static const bool value= sizeof(modular<modulus<int32_t,V>,W>)
00258 == sizeof(int32_t); };
00259 template<typename V, typename W>
00260 struct sse_has_helper<neg_op, modular<modulus<uint32_t,V>,W> > {
00261 static const bool value= sizeof(modular<modulus<uint32_t,V>,W>)
00262 == sizeof(uint32_t); };
00263 template<typename V, typename W>
00264 struct sse_has_helper<neg_op, modular<modulus<int64_t,V>,W> > {
00265 static const bool value= sizeof(modular<modulus<int64_t,V>,W>)
00266 == sizeof(int64_t); };
00267 template<typename V, typename W>
00268 struct sse_has_helper<neg_op, modular<modulus<uint64_t,V>,W> > {
00269 static const bool value= sizeof(modular<modulus<uint64_t,V>,W>)
00270 == sizeof(uint64_t); };
00271
00272 #endif // __SSE4_2__
00273
00274
00275
00276
00277
00278 #ifdef __SSE4_2__
00279
00280 struct simd_add_mod_without_overflow_op {
00281 static generic name () { return "simd_plus_mod"; }
00282 template<typename C> static inline C
00283 op (const C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00284 const C d = x + y;
00285 return min (d, d - p.p); }
00286 static inline sse_uint64_t
00287 op (const sse_uint64_t& x, const sse_uint64_t& y,
00288 const simd_modulus_add_helper<sse_uint64_t>& p) {
00289 const sse_uint64_t d = x + y - p.p;
00290 return d + ((sse_uint64_t) simd_less ((sse_int64_t) (__m128i) d,
00291 (sse_int64_t) (__m128i) _zero_uint64)
00292 & p.p); }
00293 template<typename C> static inline void
00294 set_op (C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00295 x= op (x, y, p); }
00296 template<typename C> static inline void
00297 set_op (C& x, const C& y, const C&z,
00298 const simd_modulus_add_helper<C>& p) {
00299 x= op (y, z, p); }
00300 };
00301
00302 struct simd_add_mod_with_overflow_op {
00303 static generic name () { return "simd_plus_mod"; }
00304 template<typename C> static inline C
00305 op (const C& x, const C&y, const simd_modulus_add_helper<C>& p) {
00306 C d= x + y;
00307 return d - ((simd_gtr (x, d) | simd_gtr (d, p.p_minus_1)) & p.p); }
00308 template<typename C> static inline void
00309 set_op (C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00310 x= op (x, y, p); }
00311 template<typename C> static inline void
00312 set_op (C& x, const C& y, const C&z,
00313 const simd_modulus_add_helper<C>& p) {
00314 x= op (y, z, p); }
00315 };
00316
00317 template<typename C, bool b>
00318 struct simd_add_mod_op_helper {
00319 typedef simd_add_mod_with_overflow_op type; };
00320
00321 template<typename C>
00322 struct simd_add_mod_op_helper<C,true> {
00323 typedef simd_add_mod_without_overflow_op type; };
00324
00325 template<typename C,nat s>
00326 struct simd_op_mod_helper<add_op,C,s> :
00327 simd_add_mod_op_helper<C,s != 8*sizeof(C) > {};
00328
00329 template<typename V, typename W>
00330 struct sse_has_helper<add_op, modular<modulus<int8_t,V>,W> > {
00331 static const bool value= sizeof(modular<modulus<int8_t,V>,W>)
00332 == sizeof(int8_t); };
00333 template<typename V, typename W>
00334 struct sse_has_helper<add_op, modular<modulus<uint8_t,V>,W> > {
00335 static const bool value= sizeof(modular<modulus<uint8_t,V>,W>)
00336 == sizeof(uint8_t); };
00337 template<typename V, typename W>
00338 struct sse_has_helper<add_op, modular<modulus<int16_t,V>,W> > {
00339 static const bool value= sizeof(modular<modulus<int16_t,V>,W>)
00340 == sizeof(int16_t); };
00341 template<typename V, typename W>
00342 struct sse_has_helper<add_op, modular<modulus<uint16_t,V>,W> > {
00343 static const bool value= sizeof(modular<modulus<uint16_t,V>,W>)
00344 == sizeof(uint16_t); };
00345 template<typename V, typename W>
00346 struct sse_has_helper<add_op, modular<modulus<int32_t,V>,W> > {
00347 static const bool value= sizeof(modular<modulus<int32_t,V>,W>)
00348 == sizeof(int32_t); };
00349 template<typename V, typename W>
00350 struct sse_has_helper<add_op, modular<modulus<uint32_t,V>,W> > {
00351 static const bool value= sizeof(modular<modulus<uint32_t,V>,W>)
00352 == sizeof(uint32_t); };
00353 template<typename V, typename W>
00354 struct sse_has_helper<add_op, modular<modulus<int64_t,V>,W> > {
00355 static const bool value= sizeof(modular<modulus<int64_t,V>,W>)
00356 == sizeof(int64_t); };
00357 template<typename V, typename W>
00358 struct sse_has_helper<add_op, modular<modulus<uint64_t,V>,W> > {
00359 static const bool value= sizeof(modular<modulus<uint64_t,V>,W>)
00360 == sizeof(uint64_t); };
00361 #endif // __SSE4_2__
00362
00363
00364
00365
00366
00367 #ifdef __SSE4_2__
00368
00369 struct simd_sub_mod_without_overflow_op {
00370 static generic name () { return "simd_minus_mod"; }
00371 template<typename C> static inline C
00372 op (const C& x, const C&y, const simd_modulus_add_helper<C>& p) {
00373 typedef typename Simd_base_type(C) U;
00374 typedef typename signed_of_helper<U>::type I;
00375 typedef typename Simd_type(I) simd_I;
00376 return (simd_less ((simd_I) x, (simd_I) y) & p.p) + x - y; }
00377 template<typename C> static inline void
00378 set_op (C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00379 x= op (x, y, p); }
00380 template<typename C> static inline void
00381 set_op (C& x, const C& y, const C&z,
00382 const simd_modulus_add_helper<C>& p) {
00383 x= op (y, z, p); }
00384 };
00385
00386 struct simd_sub_mod_with_overflow_op {
00387 static generic name () { return "simd_minus_mod"; }
00388 template<typename C> static inline C
00389 op (const C& x, const C&y, const simd_modulus_add_helper<C>& p) {
00390 return (simd_less (x, y) & p.p) + x - y; }
00391 template<typename C> static inline void
00392 set_op (C& x, const C& y, const simd_modulus_add_helper<C>& p) {
00393 x= op (x, y, p); }
00394 template<typename C> static inline void
00395 set_op (C& x, const C& y, const C&z,
00396 const simd_modulus_add_helper<C>& p) {
00397 x= op (y, z, p); }
00398 };
00399
00400 template<typename C, bool b>
00401 struct simd_sub_mod_op_helper {
00402 typedef simd_sub_mod_with_overflow_op type; };
00403
00404 template<typename C>
00405 struct simd_sub_mod_op_helper<C,true> {
00406 typedef simd_sub_mod_without_overflow_op type; };
00407
00408 template<typename C,nat s>
00409 struct simd_op_mod_helper<sub_op,C,s> :
00410 simd_sub_mod_op_helper<C,s != 8*sizeof(C) > {};
00411
00412 template<typename V, typename W>
00413 struct sse_has_helper<sub_op, modular<modulus<int8_t,V>,W> > {
00414 static const bool value= sizeof(modular<modulus<int8_t,V>,W>)
00415 == sizeof(int8_t); };
00416 template<typename V, typename W>
00417 struct sse_has_helper<sub_op, modular<modulus<uint8_t,V>,W> > {
00418 static const bool value= sizeof(modular<modulus<uint8_t,V>,W>)
00419 == sizeof(uint8_t); };
00420 template<typename V, typename W>
00421 struct sse_has_helper<sub_op, modular<modulus<int16_t,V>,W> > {
00422 static const bool value= sizeof(modular<modulus<int16_t,V>,W>)
00423 == sizeof(int16_t); };
00424 template<typename V, typename W>
00425 struct sse_has_helper<sub_op, modular<modulus<uint16_t,V>,W> > {
00426 static const bool value= sizeof(modular<modulus<uint16_t,V>,W>)
00427 == sizeof(uint16_t); };
00428 template<typename V, typename W>
00429 struct sse_has_helper<sub_op, modular<modulus<int32_t,V>,W> > {
00430 static const bool value= sizeof(modular<modulus<int32_t,V>,W>)
00431 == sizeof(int32_t); };
00432 template<typename V, typename W>
00433 struct sse_has_helper<sub_op, modular<modulus<uint32_t,V>,W> > {
00434 static const bool value= sizeof(modular<modulus<uint32_t,V>,W>)
00435 == sizeof(uint32_t); };
00436 template<typename V, typename W>
00437 struct sse_has_helper<sub_op, modular<modulus<int64_t,V>,W> > {
00438 static const bool value= sizeof(modular<modulus<int64_t,V>,W>)
00439 == sizeof(int64_t); };
00440 template<typename V, typename W>
00441 struct sse_has_helper<sub_op, modular<modulus<uint64_t,V>,W> > {
00442 static const bool value= sizeof(modular<modulus<uint64_t,V>,W>)
00443 == sizeof(uint64_t); };
00444 #endif // __SSE4_2__
00445
00446
00447
00448
00449
00450 #ifdef __SSE4_2__
00451
00452 #undef Modulus
00453 #define Modulus modulus<C,modulus_int_preinverse<k> >
00454
00455 template<typename VV, typename VW,
00456 typename C, nat k, typename W>
00457 struct vec_binary_simd_helper <true,VV,VW,mul_op,Modular> {
00458 static inline void op (Modular* dest, const Modular* s1,
00459 const Modular* s2, nat n) {
00460 typedef typename unsigned_of_helper<C>::type U;
00461 typedef typename stdint_of_helper<typename
00462 unsigned_int_with_double_size_helper<U>::type>::type L;
00463 typedef typename Simd_type (U) simd_U;
00464 typedef typename simd_op_mod_helper<mul_op,U,k>::type simd_Op;
00465 typedef implementation<vector_abstractions,VW> SVec;
00466 static const nat m= Simd_size (U);
00467 Modulus _p= Modular::get_modulus ();
00468 simd_modulus_mul_helper<simd_U> simd_p (_p.p, _p.q, _p.s, _p.t);
00469 SVec::template vec_ternary_scalar<simd_Op>
00470 ((simd_U*) dest, (const simd_U*) s1, (const simd_U*) s2,
00471 simd_p, n / m); }
00472 };
00473
00474 template<typename VV, typename VW,
00475 typename C, nat k, typename W>
00476 struct vec_binary_scalar_simd_helper <true,VV,VW,mul_op,Modular> {
00477 static inline void op (Modular* dest, const Modular* s,
00478 const Modular& x, nat n) {
00479 typedef typename unsigned_of_helper<C>::type U;
00480 typedef typename stdint_of_helper<typename
00481 unsigned_int_with_double_size_helper<U>::type>::type L;
00482 typedef typename Simd_type (U) simd_U;
00483 typedef typename simd_op_mod_helper<mul_op,U,k>::type simd_Op;
00484 typedef implementation<vector_abstractions,VW> SVec;
00485 static const nat m= Simd_size (U);
00486 Modulus _p= Modular::get_modulus ();
00487 simd_modulus_mul_helper<simd_U> simd_p (_p.p, _p.q, _p.s, _p.t);
00488 simd_U vx= simd_set_duplicate (*x);
00489 SVec::template vec_ternary_scalar_scalar<simd_Op>
00490 ((simd_U*) dest, (const simd_U*) s, vx, simd_p, n / m); }
00491 };
00492
00493 template<typename VV, typename VW,
00494 typename C, nat k, typename W>
00495 struct vec_binary_scalar_simd_helper <true,VV,VW,rmul_op,Modular> :
00496 vec_binary_scalar_simd_helper <true,VV,VW,mul_op,Modular> {};
00497
00498 template<typename VV, typename VW,
00499 typename C, nat k, typename W>
00500 struct vec_binary_scalar_simd_helper <true,VV,VW,lmul_op,Modular> :
00501 vec_binary_scalar_simd_helper <true,VV,VW,mul_op,Modular> {};
00502
00503 struct simd_mul_mod_with_half_of_free_bits_op {
00504 static generic name () { return "simd_times_mod"; }
00505 template<typename C> static inline C
00506 op (const C& x, const C& y, const simd_modulus_mul_helper<C>& p) {
00507 C r= x * y;
00508 C h= simd_srl (simd_srl (r, p.s) * p.q, p.t);
00509 r= r - h * p.p;
00510 return min (r, r - p.p); }
00511 template<typename C> static inline void
00512 set_op (C& x, const C& y, const simd_modulus_mul_helper<C>& p) {
00513 x= op (x, y, p); }
00514 template<typename C> static inline void
00515 set_op (C& x, const C& y, const C&z, const simd_modulus_mul_helper<C>& p) {
00516 x= op (y, z, p); }
00517 };
00518
00519 struct simd_mul_mod_with_two_free_bits_op {
00520 static generic name () { return "simd_times_mod"; }
00521 template<typename C> static inline C
00522 op (const C& x, const C&y, const simd_modulus_mul_helper<C>& p) {
00523 typedef typename Simd_base_type(C) U;
00524 typedef typename stdint_of_helper<typename
00525 unsigned_int_with_double_size_helper<U>::type>::type L;
00526 typedef typename Simd_type(L) D;
00527 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00528 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00529 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00530 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00531 D r0= u0 * v0;
00532 D r1= u1 * v1;
00533 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00534 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00535 C a = simd_pack (r0 & p.lo_mask_D, r1 & p.lo_mask_D);
00536 C b = simd_pack (h0, h1);
00537 b= a - b * p.p;
00538 return min (b, b - p.p); }
00539
00540 static inline sse_uint8_t
00541 op (const sse_uint8_t& x, const sse_uint8_t&y,
00542 const simd_modulus_mul_helper<sse_uint8_t>& p) {
00543 typedef sse_uint8_t C;
00544 typedef Simd_base_type(C) U;
00545 typedef stdint_of_helper<
00546 unsigned_int_with_double_size_helper<U>::type>::type L;
00547 typedef Simd_type(L) D;
00548 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00549 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00550 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00551 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00552 D r0= u0 * v0;
00553 D r1= u1 * v1;
00554 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00555 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00556 r0 = r0 - h0 * p.p_D;
00557 r1 = r1 - h1 * p.p_D;
00558 C b= simd_pack (r0, r1);
00559 return min (b, b - p.p); }
00560
00561 static inline sse_uint32_t
00562 op (const sse_uint32_t& x, const sse_uint32_t&y,
00563 const simd_modulus_mul_helper<sse_uint32_t>& p) {
00564 typedef uint32_t U;
00565 typedef uint64_t L;
00566 typedef sse_uint32_t C;
00567 typedef sse_uint64_t D;
00568 D r0= (D) _mm_mul_epu32 ((__m128i) x, (__m128i) y);
00569 D h0= (D) _mm_mul_epu32 ((__m128i) simd_srl (r0, p.s), (__m128i) p.q);
00570 D u1= (D) _mm_srli_si128 ((__m128i) x, 4);
00571 D v1= (D) _mm_srli_si128 ((__m128i) y, 4);
00572 D r1= (D) _mm_mul_epu32 ((__m128i) u1, (__m128i) v1);
00573 D h1= (D) _mm_mul_epu32 ((__m128i) simd_srl (r1, p.s), (__m128i) p.q);
00574 h0= simd_srl (h0, p.t);
00575 h1= simd_srl (h1, p.t);
00576 C a = (C) _mm_blend_epi16 ((__m128i) r0,
00577 _mm_slli_si128 ((__m128i) r1, 4), 4 + 8 + 64 + 128);
00578 C b = ((C) (__m128i) h0) | ((C) _mm_slli_si128 ((__m128i) h1, 4));
00579 b= a - (C) _mm_mullo_epi32 ((__m128i) b, (__m128i) p.p);
00580 return min (b, b - p.p); }
00581
00582 template<typename C> static inline void
00583 set_op (C& x, const C& y, const simd_modulus_mul_helper<C>& p) {
00584 x= op (x, y, p); }
00585 template<typename C> static inline void
00586 set_op (C& x, const C& y, const C&z, const simd_modulus_mul_helper<C>& p) {
00587 x= op (y, z, p); }
00588 };
00589
00590 struct simd_mul_mod_with_one_free_bit_op {
00591 static generic name () { return "simd_times_mod"; }
00592 template<typename C> static inline C
00593 op (const C& x, const C&y, const simd_modulus_mul_helper<C>& p) {
00594 typedef typename Simd_base_type(C) U;
00595 typedef typename stdint_of_helper<typename
00596 unsigned_int_with_double_size_helper<U>::type>::type L;
00597 typedef typename Simd_type(L) D;
00598 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00599 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00600 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00601 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00602 D r0= u0 * v0;
00603 D r1= u1 * v1;
00604 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00605 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00606 D b0= r0 - h0 * p.p_D;
00607 D b1= r1 - h1 * p.p_D;
00608 b0= min (b0, b0 - p.p_D);
00609 b1= min (b1, b1 - p.p_D);
00610 C b = simd_pack (b0, b1);
00611 return min (b, b - p.p); }
00612
00613 static inline sse_uint8_t
00614 op (const sse_uint8_t& x, const sse_uint8_t&y,
00615 const simd_modulus_mul_helper<sse_uint8_t>& p) {
00616 typedef sse_uint8_t C;
00617 typedef Simd_base_type(C) U;
00618 typedef stdint_of_helper<
00619 unsigned_int_with_double_size_helper<U>::type>::type L;
00620 typedef Simd_type(L) D;
00621 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00622 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00623 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00624 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00625 D r0= u0 * v0;
00626 D r1= u1 * v1;
00627 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00628 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00629 r0 = r0 - h0 * p.p_D;
00630 r1 = r1 - h1 * p.p_D;
00631 r0 = min (r0, r0 - p.p_D);
00632 r1 = min (r1, r1 - p.p_D);
00633 C b= simd_pack (r0, r1);
00634 return min (b, b - p.p); }
00635
00636 static inline sse_uint32_t
00637 op (const sse_uint32_t& x, const sse_uint32_t&y,
00638 const simd_modulus_mul_helper<sse_uint32_t>& p) {
00639 typedef uint32_t U;
00640 typedef uint64_t L;
00641 typedef sse_uint32_t C;
00642 typedef sse_uint64_t D;
00643 D r0= (D) _mm_mul_epu32 ((__m128i) x, (__m128i) y);
00644 D h0= (D) _mm_mul_epu32 ((__m128i) simd_srl (r0, p.s), (__m128i) p.q_D);
00645
00646 h0= simd_srl (h0, p.t);
00647 D u1= (D) _mm_srli_si128 ((__m128i) x, 4);
00648 D v1= (D) _mm_srli_si128 ((__m128i) y, 4);
00649 D r1= (D) _mm_mul_epu32 ((__m128i) u1, (__m128i) v1);
00650 D h1= (D) _mm_mul_epu32 ((__m128i) simd_srl (r1, p.s), (__m128i) p.q_D);
00651
00652 h1= simd_srl (h1, p.t);
00653 D b0= r0 - (D) _mm_mul_epu32 ((__m128i) h0, (__m128i) p.p_D);
00654 D b1= r1 - (D) _mm_mul_epu32 ((__m128i) h1, (__m128i) p.p_D);
00655 b0= b0 - (((D) simd_gtr ((sse_int64_t) b0,
00656 (sse_int64_t) p.p_minus_1_D)) & p.p_D);
00657 b1= b1 - (((D) simd_gtr ((sse_int64_t) b1,
00658 (sse_int64_t) p.p_minus_1_D)) & p.p_D);
00659 C b = (C) _mm_blend_epi16 ((__m128i) b0,
00660 _mm_slli_si128 ((__m128i) b1, 4), 4 + 8 + 64 + 128);
00661 return min (b, b - p.p); }
00662
00663 template<typename C> static inline void
00664 set_op (C& x, const C& y, const simd_modulus_mul_helper<C>& p) {
00665 x= op (x, y, p); }
00666 template<typename C> static inline void
00667 set_op (C& x, const C& y, const C&z, const simd_modulus_mul_helper<C>& p) {
00668 x= op (y, z, p); }
00669 };
00670
00671 struct simd_mul_mod_op {
00672 static generic name () { return "simd_times_mod"; }
00673 template<typename C> static inline C
00674 op (const C& x, const C&y, const simd_modulus_mul_helper<C>& p) {
00675 typedef typename Simd_base_type(C) U;
00676 typedef typename stdint_of_helper<typename
00677 unsigned_int_with_double_size_helper<U>::type>::type L;
00678 typedef typename Simd_type(L) D;
00679 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00680 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00681 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00682 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00683 D r0= u0 * v0;
00684 D r1= u1 * v1;
00685 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00686 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00687 D b0= r0 - h0 * p.p_D;
00688 D b1= r1 - h1 * p.p_D;
00689 b0= min (b0, b0 - p.p_D);
00690 b1= min (b1, b1 - p.p_D);
00691 b0= min (b0, b0 - p.p_D);
00692 b1= min (b1, b1 - p.p_D);
00693 C b = simd_pack (b0, b1);
00694 return min (b, b - p.p); }
00695
00696 static inline sse_uint8_t
00697 op (const sse_uint8_t& x, const sse_uint8_t&y,
00698 const simd_modulus_mul_helper<sse_uint8_t>& p) {
00699 typedef sse_uint8_t C;
00700 typedef Simd_base_type(C) U;
00701 typedef stdint_of_helper<
00702 unsigned_int_with_double_size_helper<U>::type>::type L;
00703 typedef Simd_type(L) D;
00704 D u0= (D) (__m128i) simd_unpacklo (x, p.zero);
00705 D u1= (D) (__m128i) simd_unpackhi (x, p.zero);
00706 D v0= (D) (__m128i) simd_unpacklo (y, p.zero);
00707 D v1= (D) (__m128i) simd_unpackhi (y, p.zero);
00708 D r0= u0 * v0;
00709 D r1= u1 * v1;
00710 D h0= simd_srl (simd_srl (r0, p.s) * p.q_D, p.t);
00711 D h1= simd_srl (simd_srl (r1, p.s) * p.q_D, p.t);
00712 r0 = r0 - h0 * p.p_D;
00713 r1 = r1 - h1 * p.p_D;
00714 r0 = min (r0, r0 - p.p_D);
00715 r1 = min (r1, r1 - p.p_D);
00716 r0 = min (r0, r0 - p.p_D);
00717 r1 = min (r1, r1 - p.p_D);
00718 C b= simd_pack (r0, r1);
00719 return min (b, b - p.p); }
00720
00721 static inline sse_uint32_t
00722 op (const sse_uint32_t& x, const sse_uint32_t&y,
00723 const simd_modulus_mul_helper<sse_uint32_t>& p) {
00724 typedef uint32_t U;
00725 typedef uint64_t L;
00726 typedef sse_uint32_t C;
00727 typedef sse_int64_t D;
00728 D r0= (D) _mm_mul_epu32 ((__m128i) x, (__m128i) y);
00729 D h0= (D) _mm_mul_epu32 ((__m128i) simd_srl (r0, p.s), (__m128i) p.q_D);
00730
00731 h0= simd_srl (h0, p.t);
00732 D b0= r0 - (D) _mm_mul_epu32 ((__m128i) h0, (__m128i) p.p_D);
00733 b0= b0 - (D) (((D) (__m128i) simd_gtr ((sse_int64_t) (__m128i) b0,
00734 (sse_int64_t) (__m128i) p.p_minus_1_D)) & p.p_D);
00735 b0= b0 - (D) (((D) (__m128i) simd_gtr ((sse_int64_t) (__m128i) b0,
00736 (sse_int64_t) (__m128i) p.p_minus_1_D)) & p.p_D);
00737 D u1= (D) _mm_srli_si128 ((__m128i) x, 4);
00738 D v1= (D) _mm_srli_si128 ((__m128i) y, 4);
00739 D r1= (D) _mm_mul_epu32 ((__m128i) u1, (__m128i) v1);
00740 D h1= (D) _mm_mul_epu32 ((__m128i) simd_srl (r1, p.s), (__m128i) p.q_D);
00741
00742 h1= simd_srl (h1, p.t);
00743 D b1= r1 - (D) _mm_mul_epu32 ((__m128i) h1, (__m128i) p.p_D);
00744 b1= b1 - (D) (((D) (__m128i) simd_gtr ((sse_int64_t) (__m128i) b1,
00745 (sse_int64_t) (__m128i) p.p_minus_1_D)) & p.p_D);
00746 b1= b1 - (D) (((D) (__m128i) simd_gtr ((sse_int64_t) (__m128i) b1,
00747 (sse_int64_t) (__m128i) p.p_minus_1_D)) & p.p_D);
00748 C b = (C) (__m128i) (b0 | simd_sll (b1, 32));
00749 return min (b, b - p.p); }
00750
00751 template<typename C> static inline void
00752 set_op (C& x, const C& y, const simd_modulus_mul_helper<C>& p) {
00753 x= op (x, y, p); }
00754 template<typename C> static inline void
00755 set_op (C& x, const C& y, const C&z, const simd_modulus_mul_helper<C>& p) {
00756 x= op (y, z, p); }
00757 };
00758
00759 template<typename C, int b>
00760 struct simd_mul_mod_op_helper {
00761 typedef simd_mul_mod_op type; };
00762
00763 template<typename C>
00764 struct simd_mul_mod_op_helper<C,1> {
00765 typedef simd_mul_mod_with_one_free_bit_op type; };
00766
00767 template<typename C>
00768 struct simd_mul_mod_op_helper<C,2> {
00769 typedef simd_mul_mod_with_two_free_bits_op type; };
00770
00771 template<typename C>
00772 struct simd_mul_mod_op_helper<C,3> {
00773 typedef simd_mul_mod_with_half_of_free_bits_op type; };
00774
00775 template<nat s>
00776 struct simd_op_mod_helper<mul_op,uint8_t,s> :
00777 simd_mul_mod_op_helper<uint8_t, s <= 6 ? 2 : s <= 7 ? 1 : 0> {};
00778
00779 template<nat s>
00780 struct simd_op_mod_helper<mul_op,int8_t,s> :
00781 simd_mul_mod_op_helper<uint8_t, s <= 6 ? 2 : s <= 7 ? 1 : 0> {};
00782
00783 template<typename C,nat s>
00784 struct simd_op_mod_helper<mul_op,C,s> :
00785 simd_mul_mod_op_helper<C, s + 2 <= 4*sizeof(C) ? 3 :
00786 s + 2 <= 8*sizeof(C) ? 2 :
00787 s + 1 <= 8*sizeof(C) ? 1 : 0> {};
00788
00789 template<typename C,nat s>
00790 struct simd_op_mod_helper<lmul_op,C,s> :
00791 public simd_op_mod_helper<mul_op,C,s> {};
00792
00793 template<typename C,nat s>
00794 struct simd_op_mod_helper<rmul_op,C,s> :
00795 public simd_op_mod_helper<mul_op,C,s> {};
00796
00797 template<typename V, typename W>
00798 struct sse_has_helper<mul_op, modular<modulus<int8_t,V>,W> > {
00799 static const bool value= sizeof(modular<modulus<int8_t,V>,W>)
00800 == sizeof(int8_t); };
00801 template<typename V, typename W>
00802 struct sse_has_helper<mul_op, modular<modulus<uint8_t,V>,W> > {
00803 static const bool value= sizeof(modular<modulus<uint8_t,V>,W>)
00804 == sizeof(uint8_t); };
00805 template<typename V, typename W>
00806 struct sse_has_helper<mul_op, modular<modulus<int16_t,V>,W> > {
00807 static const bool value= sizeof(modular<modulus<int16_t,V>,W>)
00808 == sizeof(int16_t); };
00809 template<typename V, typename W>
00810 struct sse_has_helper<mul_op, modular<modulus<uint16_t,V>,W> > {
00811 static const bool value= sizeof(modular<modulus<uint16_t,V>,W>)
00812 == sizeof(uint16_t); };
00813 template<typename V, typename W>
00814 struct sse_has_helper<mul_op, modular<modulus<int32_t,V>,W> > {
00815 static const bool value= sizeof(modular<modulus<int32_t,V>,W>)
00816 == sizeof(int32_t); };
00817 template<typename V, typename W>
00818 struct sse_has_helper<mul_op, modular<modulus<uint32_t,V>,W> > {
00819 static const bool value= sizeof(modular<modulus<uint32_t,V>,W>)
00820 == sizeof(uint32_t); };
00821 #endif // __SSE4_2__
00822
00823 #undef Modulus
00824 #undef Modular
00825 }
00826 #endif // NUMERIX_ENABLE_SIMD && ALGEBRAMIX_ENABLE_SIMD
00827 #endif // __MMX__MODULAR_SIMD__HPP