00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__MATRIX_MODULAR_INT__HPP
00014 #define __MMX__MATRIX_MODULAR_INT__HPP
00015 #include <numerix/modular_int.hpp>
00016 #include <algebramix/matrix_int.hpp>
00017 #include <algebramix/matrix_double.hpp>
00018 #include <algebramix/matrix_modular.hpp>
00019
00020 namespace mmx {
00021 #define TMPL template<typename C, typename V1, typename V2>
00022 #define Modulus modulus<C,V1>
00023 #define Modular modular<Modulus,V2>
00024
00025
00026
00027
00028
00029 typedef matrix_unrolled<4,matrix_unrolled<4,matrix_naive> >
00030 matrix_unrolled_4_4;
00031
00032 DEFINE_VARIANT (matrix_modular_int,
00033 matrix_strassen<
00034 matrix_threads<
00035 matrix_unrolled_4_4> >)
00036
00037 #define DECLARE_HELPER(I,V1) \
00038 template<typename V2, nat m> \
00039 struct matrix_variant_helper<modular<modulus<I,V1>,V2> > { \
00040 typedef matrix_naive Naive; \
00041 typedef matrix_unrolled<4,matrix_unrolled<2,Naive> > Unrolled; \
00042 typedef typename Matrix_simd_variant(I) Simd; \
00043 typedef matrix_modular_int MV; \
00044 };
00045
00046 DECLARE_HELPER(signed char, modulus_int_naive<m>)
00047 DECLARE_HELPER(signed char, modulus_int_preinverse<m>)
00048 DECLARE_HELPER(unsigned char, modulus_int_naive<m>)
00049 DECLARE_HELPER(unsigned char, modulus_int_preinverse<m>)
00050 DECLARE_HELPER(short int, modulus_int_naive<m>)
00051 DECLARE_HELPER(short int, modulus_int_preinverse<m>)
00052 DECLARE_HELPER(unsigned short int, modulus_int_naive<m>)
00053 DECLARE_HELPER(unsigned short int, modulus_int_preinverse<m>)
00054 DECLARE_HELPER(int, modulus_int_naive<m>)
00055 DECLARE_HELPER(int, modulus_int_preinverse<m>)
00056 DECLARE_HELPER(unsigned int, modulus_int_naive<m>)
00057 DECLARE_HELPER(unsigned int, modulus_int_preinverse<m>)
00058 DECLARE_HELPER(long int, modulus_int_naive<m>)
00059 DECLARE_HELPER(long int, modulus_int_preinverse<m>)
00060 DECLARE_HELPER(unsigned long int, modulus_int_naive<m>)
00061 DECLARE_HELPER(unsigned long int, modulus_int_preinverse<m>)
00062 DECLARE_HELPER(long long int, modulus_int_naive<m>)
00063 DECLARE_HELPER(long long int, modulus_int_preinverse<m>)
00064 DECLARE_HELPER(unsigned long long int, modulus_int_naive<m>)
00065 DECLARE_HELPER(unsigned long long int, modulus_int_preinverse<m>)
00066 #undef DECLARE_HELPER
00067
00068
00069
00070
00071
00072 template<typename D>
00073 struct free_bits_helper {
00074 static const nat value= 8 * sizeof (D);
00075 };
00076
00077 template<typename V, typename D, typename C, typename V1, typename V2> void
00078 encode_modular_int (D* dest, const Modular* src,
00079 nat r, nat rr, nat c, nat cc)
00080 {
00081 typedef implementation<matrix_linear,V> Mat;
00082 nat dest_rs= Mat::index (1, 0, r, c);
00083 nat dest_cs= Mat::index (0, 1, r, c);
00084 nat src_rs = Mat::index (1, 0, rr, cc);
00085 nat src_cs = Mat::index (0, 1, rr, cc);
00086 D* dest_row= dest;
00087 const Modular* src_row= src;
00088 for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00089 D* dest_col= dest_row;
00090 const Modular* src_col= src_row;
00091 for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00092 *dest_col= (D) (*(*src_col));
00093 }
00094 }
00095
00096 template<typename Op, typename V, typename C, typename D,
00097 typename V1, typename V2> void
00098 decode_modular_int (Modular* dest, const D* src,
00099 nat r, nat rr, nat c, nat cc)
00100 {
00101 typedef implementation<matrix_linear,V> Mat;
00102 typedef typename Op::nomul_op Set;
00103 C p = *Modular::get_modulus ();
00104 nat dest_rs= Mat::index (1, 0, rr, cc);
00105 nat dest_cs= Mat::index (0, 1, rr, cc);
00106 nat src_rs = Mat::index (1, 0, r, c);
00107 nat src_cs = Mat::index (0, 1, r, c);
00108 Modular* dest_row= dest;
00109 const D* src_row= src;
00110 for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00111 Modular* dest_col= dest_row;
00112 const D* src_col= src_row;
00113 for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00114 Set::set_op (*dest_col, Modular ((*src_col) % p, true));
00115 }
00116 }
00117
00118 #if defined(NUMERIX_ENABLE_SIMD)\
00119 && defined(ALGEBRAMIX_ENABLE_SIMD)\
00120 && defined(__SSE2__)
00121
00122 template<typename Op, typename V,
00123 typename C, typename V1, typename V2> void
00124 decode_modular_int (Modular* dest, const double* src,
00125 nat r, nat rr, nat c, nat cc)
00126 {
00127 typedef implementation<matrix_linear,V> Mat;
00128 typedef typename Op::nomul_op Set;
00129 uint32_t p = (uint32_t) *Modular::get_modulus ();
00130 nat dest_rs= Mat::index (1, 0, rr, cc);
00131 nat dest_cs= Mat::index (0, 1, rr, cc);
00132 nat src_rs = Mat::index (1, 0, r, c);
00133 nat src_cs = Mat::index (0, 1, r, c);
00134 Modular* dest_row= dest;
00135 const double* src_row= src;
00136 for (nat i=0; i<r; i++, dest_row += dest_rs, src_row += src_rs) {
00137 Modular* dest_col= dest_row;
00138 const double* src_col= src_row;
00139 for (nat j=0; j<c; j++, dest_col += dest_cs, src_col += src_cs)
00140 Set::set_op (*dest_col, Modular (((uint64_t) (*src_col)) % p, true));
00141 }
00142 }
00143 STMPL
00144 struct free_bits_helper<double> {
00145 static const nat value= 53;
00146 };
00147 #endif // __SSE2__
00148
00149
00150 template<typename V>
00151 struct implementation<matrix_multiply_base,V,matrix_modular_int>:
00152 public implementation<matrix_linear,V>
00153 {
00154 typedef matrix_strassen<matrix_threads<matrix_unrolled_4_4> > W;
00155 typedef implementation<matrix_multiply,W> Fallback;
00156 typedef implementation<matrix_linear,W> Mat;
00157 private:
00158
00159 #if defined(NUMERIX_ENABLE_SIMD)\
00160 && defined(ALGEBRAMIX_ENABLE_SIMD)\
00161 && defined(__SSE2__)
00162 template<nat s>
00163 struct lifting_int_with_size_at_least_helper {
00164 template<bool b, typename T, typename E>
00165 struct if_helper { typedef T ret; };
00166 template<typename T, typename E>
00167 struct if_helper<false,T,E> { typedef E ret; };
00168 typedef
00169 typename if_helper<8*sizeof(short unsigned int) >= s, short unsigned int,
00170 typename if_helper<53 >= s, double,
00171 long long unsigned int>::ret >::ret type;
00172 };
00173
00174 #else
00175 template<nat s>
00176 struct lifting_int_with_size_at_least_helper {
00177 template<bool b, typename T, typename E>
00178 struct if_helper { typedef T ret; };
00179 template<typename T, typename E>
00180 struct if_helper<false,T,E> { typedef E ret; };
00181 typedef
00182 typename if_helper<8*sizeof(unsigned char)>=s, unsigned char,
00183 typename if_helper<8*sizeof(short unsigned int)>=s, short unsigned int,
00184 typename if_helper<8*sizeof(unsigned int)>=s, unsigned int,
00185 typename if_helper<8*sizeof(long unsigned int)>=s, long unsigned int,
00186 long long unsigned int>::ret >::ret >::ret >::ret type ;
00187 };
00188 #endif
00189
00190 template<typename Op, typename C, typename V1, typename V2>
00191 static inline void
00192 _mul_int (Modular* d, const Modular* s1, const Modular* s2,
00193 nat r, nat rr, nat l, nat ll, nat c, nat cc)
00194 {
00195 static const nat modulus_size= V1::template maximum_size_helper<C>::value;
00196
00197 static const nat required_size= (modulus_size << 1) + 2;
00198 typedef typename
00199 lifting_int_with_size_at_least_helper<required_size>::type D;
00200
00201 static const nat possible_free_bits=
00202 free_bits_helper<D>::value - (modulus_size << 1);
00203 static const nat free_bits= possible_free_bits >= 16
00204 ? 16 : possible_free_bits;
00205 const nat ul= MMX_SAFE_LEFT_SHIFT_INT(nat,1,free_bits);
00206 typedef typename Matrix_variant(D) W;
00207 typedef implementation<matrix_multiply,W> Mat_D;
00208
00209 if (free_bits == 0) {
00210 Fallback::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00211 }
00212 else if (l <= ul) {
00213 nat len_1= aligned_size<D,W> (r * l);
00214 nat len_2= aligned_size<D,W> (l * c);
00215 nat len_d= aligned_size<D,W> (r * c);
00216 nat spc = len_1 + len_2 + len_d;
00217 D* x1= mmx_new<D> (spc);
00218 D* x2= x1 + len_1;
00219 D* xd= x2 + len_2;
00220 encode_modular_int<W> (x1, s1, r, rr, l, ll);
00221 encode_modular_int<W> (x2, s2, l, ll, c, cc);
00222 Mat_D::template mul<Op> (xd, x1, x2, r, r, l, l, c, c);
00223 decode_modular_int<Op,W> (d, xd, r, rr, c, cc);
00224 mmx_delete<D> (x1, spc);
00225 }
00226 else {
00227 typedef typename Op::acc_op Acc;
00228 const nat ur= 32;
00229 const nat uc= 32;
00230
00231 nat nr= (r + ur - 1) / ur;
00232 nat nl= (l + ul - 1) / ul;
00233 nat nc= (c + uc - 1) / uc;
00234 for (nat ir=0; ir<nr; ir++)
00235 for (nat ic=0; ic<nc; ic++) {
00236 nat il=0;
00237 for (; il<1; il++) {
00238 mul<Op > (d + Mat::index (ir*ur, ic*uc, rr, cc),
00239 s1 + Mat::index (ir*ur, il*ul, rr, ll),
00240 s2 + Mat::index (il*ul, ic*uc, ll, cc),
00241 min (ur, r - ir*ur), rr,
00242 min (ul, l - il*ul), ll,
00243 min (uc, c - ic*uc), cc);
00244 }
00245 for (; il<nl; il++) {
00246 mul<Acc> (d + Mat::index (ir*ur, ic*uc, rr, cc),
00247 s1 + Mat::index (ir*ur, il*ul, rr, ll),
00248 s2 + Mat::index (il*ul, ic*uc, ll, cc),
00249 min (ur, r - ir*ur), rr,
00250 min (ul, l - il*ul), ll,
00251 min (uc, c - ic*uc), cc);
00252 }
00253 }
00254 }
00255 }
00256
00257 public:
00258
00259 template<typename Op, typename C, typename V1, typename V2>
00260 static inline void
00261 mul (Modular* d, const Modular* s1, const Modular* s2,
00262 nat r, nat rr, nat l, nat ll, nat c, nat cc)
00263 {
00264 _mul_int<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00265 }
00266
00267 };
00268
00269 #undef TMPL
00270 #undef Modulus
00271 #undef Modular
00272 }
00273
00274 #endif //__MMX__MATRIX_MODULAR_INT__HPP