00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 #ifndef __MMX__MATRIX_NAIVE__HPP
00014 #define __MMX__MATRIX_NAIVE__HPP
00015 #include <basix/vector.hpp>
00016 
00017 namespace mmx {
00018 #define TMPL template<typename C>
00019 
00020 
00021 
00022 
00023 
00024 struct matrix_naive {
00025   typedef vector_naive Vec;        
00026   typedef matrix_naive Naive;      
00027   typedef matrix_naive Positive;   
00028   typedef matrix_naive No_aligned; 
00029   typedef matrix_naive No_simd;    
00030   typedef matrix_naive No_thread;  
00031   typedef matrix_naive No_scaled;  
00032 };
00033 
00034 template<typename C>
00035 struct matrix_variant_helper {
00036   typedef matrix_naive MV;
00037 };
00038 
00039 
00040 
00041 
00042 
00043 struct matrix_defaults {};
00044 
00045 template<typename V>
00046 struct implementation<matrix_defaults,V,matrix_naive> {
00047   static const nat def_rows = 0;
00048   static const nat def_cols = 0;
00049   static const nat init_rows= 1;
00050   static const nat init_cols= 1;
00051 }; 
00052 
00053 
00054 
00055 
00056 
00057 template<typename V>
00058 struct implementation<vector_defaults,V,matrix_naive>:
00059   public implementation<vector_defaults,V,typename V::Vec> {};
00060 
00061 template<typename V>
00062 struct implementation<vector_allocate,V,matrix_naive>:
00063   public implementation<vector_allocate,V,typename V::Vec> {};
00064 
00065 template<typename V>
00066 struct implementation<vector_abstractions,V,matrix_naive>:
00067   public implementation<vector_abstractions,V,typename V::Vec> {};
00068 
00069 template<typename V>
00070 struct implementation<vector_abstractions_stride,V,matrix_naive>:
00071   public implementation<vector_abstractions_stride,V,typename V::Vec> {};
00072 
00073 template<typename V>
00074 struct implementation<vector_linear,V,matrix_naive>:
00075   public implementation<vector_linear,V,typename V::Vec> {};
00076 
00077 template<typename C, typename V> inline nat
00078 aligned_size (nat r, nat c) {
00079   return aligned_size<C,V> (r * c); }
00080 
00081 template<typename C> inline nat
00082 default_aligned_size (nat r, nat c) {
00083   return default_aligned_size<C> (r * c); }
00084 
00085 
00086 
00087 
00088 
00089 struct matrix_vectorial {};
00090 
00091 template<typename V>
00092 struct implementation<matrix_vectorial,V,matrix_naive>:
00093   public implementation<matrix_defaults,V>
00094 {
00095   typedef implementation<vector_linear,V> Vec;
00096 
00097 static inline nat
00098 index (nat row, nat col, nat rows, nat cols) {
00099   (void) cols;
00100   return col * rows + row; }
00101 
00102 TMPL
00103 static inline const C&
00104 entry (const C* m, nat row, nat col, nat rows, nat cols) {
00105   return * (m + index (row, col, rows, cols)); }
00106 
00107 
00108 
00109 
00110 
00111 
00112 
00113 
00114 template<typename Op, typename T, typename C> static inline void
00115 mat_unary_stride (T* dest, nat dest_rs, nat dest_cs,
00116                   const C* src, nat src_rs, nat src_cs,
00117                   nat rows, nat cols) {
00118   for (; cols != 0; dest += dest_cs, src += src_cs, cols--)
00119     Vec::template vec_unary_stride<Op> (dest, dest_rs, src, src_rs, rows); }
00120 
00121 template<typename Op, typename T, typename C1, typename C2> static inline void
00122 mat_binary_stride (T* dest, nat dest_rs, nat dest_cs,
00123                    const C1* src1, nat src1_rs, nat src1_cs,
00124                    const C2* src2, nat src2_rs, nat src2_cs,
00125                    nat rows, nat cols) {
00126   for (; cols != 0; dest += dest_cs, src1 += src1_cs, src2 += src2_cs, cols--)
00127     Vec::template vec_binary_stride<Op>
00128       (dest, dest_rs, src1, src1_rs, src2, src2_rs, rows); }
00129 
00130 TMPL static inline void
00131 clear (C* dest, nat r, nat c) {
00132   Vec::clear (dest, r * c);
00133 }
00134 
00135 TMPL static void
00136 set (C* dest, const C& c, nat rows, nat cols) {
00137   clear (dest, rows, cols);
00138   nat m= min (rows, cols), plus= index (1, 1, rows, cols);
00139   while (m != 0) {
00140     *dest= c;
00141     dest += plus; m--; } }
00142 
00143 TMPL static void
00144 get_range (C* d, const C* s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00145   nat rr= r2 - r1, cc = c2 - c1;
00146   mat_unary_stride<id_op>
00147     (d, index (1, 0, rr, cc), index (0, 1, rr, cc),
00148      s + index (r1, c1, r, c), index (1, 0, r, c), index (0, 1, r, c),
00149      rr, cc);
00150 }
00151 
00152 TMPL static void
00153 clear_range (C* d, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00154   d += index (r1, c1, r, c);
00155   C* dd= d;
00156   nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00157   for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00158     for (nat i=0; i<(r2-r1); i++, d += rp)
00159       clear_op::set_op (*d);
00160 }
00161 
00162 TMPL static void
00163 set_range (C* d, const C& s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00164   d += index (r1, c1, r, c);
00165   C* dd= d;
00166   nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00167   for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00168     for (nat i=0; i<(r2-r1); i++, d += rp)
00169       if (i == j) *d = s;
00170       else clear_op::set_op (*d);
00171 }
00172 
00173 TMPL static void
00174 copy (C* dest, const C* s, nat r, nat c) {
00175   Vec::copy (dest, s, r * c); }
00176 
00177 TMPL static void
00178 transpose (C* dest, const C* src, nat rows, nat cols) {
00179   mat_unary_stride<id_op>
00180     (dest, index (0, 1, cols, rows), index (1, 0, cols, rows),
00181      src,  index (1, 0, rows, cols), index (0, 1, rows, cols),
00182      rows, cols); }
00183 
00184 TMPL static inline void
00185 print (const port& out, const C* s, nat rows, nat cols) {
00186   out << "[ ";
00187   for (nat i=0; i < rows; i++) {
00188     for (nat j=0; j < cols; j++) {
00189       out << * (s + index (i, j, rows, cols));
00190       if (j + 1 != cols) out << ", ";
00191     }
00192     if (i + 1 != rows) out << ";" << lf;
00193   }
00194   out << " ]";
00195 }
00196 
00197 }; 
00198 
00199 
00200 
00201 
00202 
00203 struct matrix_linear {};
00204 
00205 template<typename V>
00206 struct implementation<matrix_linear,V,matrix_naive>:
00207   public implementation<matrix_vectorial,V>
00208 {
00209   typedef implementation<matrix_vectorial,V> Mat;
00210   typedef implementation<vector_linear,V> Vec;
00211 
00212 template<typename Op, typename C, typename X> static inline void
00213 col_unary_scalar (C* dest, const X& x, nat c1, nat r, nat rr, nat cc) {
00214   Vec::template vec_unary_scalar_stride<Op>
00215     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00216 }
00217 
00218 template<typename Op, typename C, typename X> static inline void
00219 col_binary_scalar (C* dest, const X& x, nat c1, nat c2, nat r,
00220                    nat rr, nat cc) {
00221   Vec::template vec_binary_scalar_stride<Op>
00222     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00223      dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00224 }
00225 
00226 template<typename Op, typename C> static inline void
00227 col_binary_combine (C* dest, nat c1, nat c2, nat r, nat rr, nat cc) {
00228   Vec::template vec_binary_combine_stride<Op>
00229     (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00230      dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), r);
00231 }
00232 
00233 template<typename Op, typename C, typename X> static inline void
00234 row_unary_scalar (C* dest, const X& x, nat r1, nat c, nat rr, nat cc) {
00235   Vec::template vec_unary_scalar_stride<Op>
00236     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00237 }
00238 
00239 template<typename Op, typename C, typename X> static inline void
00240 row_binary_scalar (C* dest, const X& x, nat r1, nat r2, nat c,
00241                    nat rr, nat cc) {
00242   Vec::template vec_binary_scalar_stride<Op>
00243     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00244      dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00245 }
00246 
00247 template<typename Op, typename C> static inline void
00248 row_binary_combine (C* dest, nat r1, nat r2, nat c, nat rr, nat cc) {
00249   Vec::template vec_binary_combine_stride<Op>
00250     (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00251      dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), c);
00252 }
00253 
00254 TMPL static void col_mul (C* s, const C& sc, nat i, nat r, nat c) {
00255   ASSERT (i<c, "out of range");
00256   col_unary_scalar<rmul_op> (s, sc, i, r, r, c); }
00257 TMPL static void col_div (C* s, const C& sc, nat i, nat r, nat c) {
00258   ASSERT (i<c, "out of range");
00259   col_unary_scalar<rdiv_op> (s, sc, i, r, r, c); }
00260 TMPL static void row_mul (C* s, const C& sc, nat i, nat r, nat c) {
00261   ASSERT (i<r, "out of range");
00262   row_unary_scalar<rmul_op> (s, sc, i, c, r, c); }
00263 TMPL static void row_div (C* s, const C& sc, nat i, nat r, nat c) {
00264   ASSERT (i<r, "out of range");
00265   row_unary_scalar<rdiv_op> (s, sc, i, c, r, c); }
00266 
00267 TMPL static void col_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00268   ASSERT (i<c && j<c, "out of range");
00269   col_binary_scalar<mul_sub_op> (s, sc, i, j, r, r, c); }
00270 TMPL static void row_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00271   ASSERT (i<r && j<r, "out of range");
00272   row_binary_scalar<mul_sub_op> (s, sc, i, j, c, r, c); }
00273 TMPL static void col_sweep (C* s, nat i, nat j, nat r, const C& sc,
00274                             nat rr, nat cc) {
00275   col_binary_scalar<mul_sub_op> (s, sc, i, j, r, rr, cc); }
00276 TMPL static void row_sweep (C* s, nat i, nat j, nat c, const C& sc,
00277                             nat rr, nat cc) {
00278   row_binary_scalar<mul_sub_op> (s, sc, i, j, c, rr, cc); }
00279 
00280 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat c) {
00281   ASSERT (i<c && j<c, "out of range");
00282   col_binary_combine<swap_op> (s, i, j, r, r, c); }
00283 TMPL static void row_swap (C* s, nat i, nat j, nat r, nat c) {
00284   ASSERT (i<r && j<r, "out of range");
00285   row_binary_combine<swap_op> (s, i, j, c, r, c); }
00286 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat rr, nat cc) {
00287   col_binary_combine<swap_op> (s, i, j, r, rr, cc); }
00288 TMPL static void row_swap (C* s, nat i, nat j, nat c, nat rr, nat cc) {
00289   row_binary_combine<swap_op> (s, i, j, c, rr, cc); }
00290 
00291 TMPL static void
00292 row_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00293   ASSERT (i<r && j<r, "out of range");
00294   C* iti= s + Mat::index (i, 0, r, c);
00295   C* itj= s + Mat::index (j, 0, r, c);
00296   nat plus= Mat::index (0, 1, r, c);
00297   while (c>0) {
00298     *iti= si * (*iti) - sj * (*itj);
00299     iti += plus; itj += plus; c--; } }
00300 
00301 TMPL static void
00302 col_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00303   ASSERT (i<c && j<c, "out of range");
00304   C* iti= s + Mat::index (0, i, r, c);
00305   C* itj= s + Mat::index (0, j, r, c);
00306   nat plus= Mat::index (1, 0, r, c);
00307   while (r>0) {
00308     *iti= si * (*iti) - sj * (*itj);
00309     iti += plus; itj += plus; r--; } }
00310 
00311 TMPL static bool
00312 row_is_zero (const C* s, nat i, nat r, nat c) {
00313   if (r == 0 || c == 0) return true;
00314   return Vec::template vec_binary_test_scalar_stride<equal_op>
00315     (s + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c), promote (0, *s), c); }
00316 
00317 TMPL static bool
00318 col_is_zero (const C* s, nat j, nat r, nat c) {
00319   if (r == 0 || c == 0) return true;
00320   return Vec::template vec_binary_test_scalar_stride<equal_op>
00321     (s + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), promote (0, *s), r); }
00322 
00323 }; 
00324 
00325 
00326 
00327 
00328 
00329 template<typename V>
00330 struct matrix_multiply_threshold {};
00331 
00332 struct matrix_multiply_base {};
00333 struct matrix_multiply {};
00334 
00335 template<typename V>
00336 struct implementation<matrix_multiply_base,V,matrix_naive>:
00337   public implementation<matrix_linear,V>
00338 {
00339   typedef implementation<matrix_linear,V> Mat;
00340 
00341 template<typename Op, typename C>
00342 struct clear_helper {
00343   static inline void op (C* d, nat r, nat rr, nat c, nat cc) {
00344     Mat::clear_range (d, 0, 0, r, c, rr, cc); }
00345 };
00346 
00347 template<typename C>
00348 struct clear_helper<mul_add_op,C> {
00349   static inline void op (C* d, nat r, nat rr, nat c, nat cc) {}
00350 };
00351 
00352 template<typename Op, typename C> static inline void
00353 clr (C* d, nat r, nat rr, nat c, nat cc) {
00354   clear_helper<Op,C>::op (d, r, rr, c, cc);
00355 }
00356 
00357 template<typename Op, typename D, typename S1, typename S2>
00358 static inline void
00359 mul (D* d, const S1* s1, const S2* s2,
00360      nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00361   typedef typename Op::acc_op Acc;
00362   if (l == 0) clr<Op> (d, r, rr, c, cc);
00363   else {
00364     nat ri, ci, li;
00365     D *dr, *dc;
00366     const S1 *s1r, *s1c;
00367     const S2 *s2r, *s2c;
00368     nat drp = Mat::index (1, 0, rr, cc), dcp = Mat::index (0, 1, rr, cc);
00369     nat s1rp= Mat::index (1, 0, rr, ll), s1cp= Mat::index (0, 1, rr, ll);
00370     nat s2rp= Mat::index (1, 0, ll, cc), s2cp= Mat::index (0, 1, ll, cc);
00371     for (ri= r, dr= d, s1r= s1; ri!=0; ri--, dr += drp, s1r += s1rp)
00372       for (ci= c, dc= dr, s2c= s2; ci!=0; ci--, dc += dcp, s2c += s2cp) {
00373         D tmp= *dc;
00374         for (li= l, s1c= s1r, s2r= s2c; li==l; li--, s1c += s1cp, s2r += s2rp)
00375           Op ::set_op (tmp, *s1c, *s2r);
00376         for (                         ; li!=0; li--, s1c += s1cp, s2r += s2rp)
00377           Acc::set_op (tmp, *s1c, *s2r);
00378         *dc= tmp;
00379       }
00380   }
00381 }
00382 
00383 }; 
00384 
00385 template<typename V>
00386 struct implementation<matrix_multiply,V,matrix_naive>:
00387   public implementation<matrix_multiply_base,V>
00388 {
00389   typedef implementation<matrix_multiply_base,V> Mat;
00390 
00391 template<typename Op, typename D, typename S1, typename S2>
00392 static inline void
00393 mul (D* d, const S1* s1, const S2* s2,
00394      nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00395   Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00396 }
00397 
00398 template<typename D, typename S1, typename S2> static inline void
00399 mul (D* dest, const S1* m1, const S2* m2, nat r, nat l, nat c) {
00400   Mat::template mul<mul_op> (dest, m1, m2, r, r, l, l, c, c);
00401 }
00402 
00403 }; 
00404 
00405 
00406 
00407 
00408 
00409 struct matrix_iterate {};
00410 
00411 template<typename V>
00412 struct implementation<matrix_iterate,V,matrix_naive>:
00413   public implementation<matrix_multiply,V>
00414 {
00415   typedef implementation<matrix_multiply,V> Mat;
00416 
00417 TMPL static inline void
00418 iterate_mul (C** d, const C* s, const C* x,
00419              nat rs, nat cs, nat rx, nat cx, nat n) {
00420   
00421   
00422   VERIFY (cs == rx && rs == cs, "sizes do not match");
00423   if (rs == 0) return;
00424   Mat::get_range (d[0], x, 0, 0, rx, cx, rx, cx);
00425   for (nat i = 1; i < n; i++)
00426     Mat::mul (d[i], s, d[i-1], rs, cs, cx);
00427 }
00428 
00429 TMPL static inline void
00430 project_iterate_mul (C** d, const C* y, const C* s, const C* x,
00431                      nat ry, nat cy, nat rs, nat cs, nat rx, nat cx, nat n) {
00432   
00433   VERIFY (cy == rs && rs == cs && cs == rx, "sizes do not match");
00434   if (n == 0) return;
00435   nat l= aligned_size<C,V> (rx * cx);
00436   C* buf= mmx_new<C> (l << 1); 
00437   C* t1= buf, * t2= t1 + l;
00438   Mat::get_range (t1, x, 0, 0, rx, cx, rx, cx);
00439   Mat::mul (d[0], y, t1, ry, cy, cx);
00440   for (nat i = 1; i < n; i++) {
00441     Mat::mul (t2, s, t1, rs, cs, cx);
00442     Mat::mul (d[i], y, t2, ry, cy, cx);
00443     swap (t1, t2);
00444   }
00445   mmx_delete<C> (buf, l << 1);
00446 }
00447 
00448 }; 
00449 
00450 
00451 
00452 
00453 
00454 struct matrix_permute {};
00455 
00456 template<typename V>
00457 struct implementation<matrix_permute,V,matrix_naive>:
00458   public implementation<matrix_multiply,V>
00459 {
00460   typedef implementation<matrix_multiply,V> Mat;
00461   typedef implementation<vector_linear,V> Vec;
00462 
00463   TMPL static void
00464   col_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00465     
00466     nat rs= Mat::index (1, 0, r, c);
00467     for (nat i=0; i<c; i++)
00468       Vec::template vec_unary_stride<id_op>
00469         (dest + Mat::index (0, i, r, c), rs,
00470          m + Mat::index (0, p[i], r, c), rs, r);
00471   }
00472 
00473   TMPL static void
00474   row_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00475     
00476     nat cs= Mat::index (0, 1, r, c);
00477     for (nat i=0; i<r; i++)
00478       Vec::template vec_unary_stride<id_op>
00479         (dest + Mat::index (i, 0, r, c), cs,
00480          m + Mat::index (p[i], 0, r, c), cs, c);
00481   }
00482 
00483   TMPL static void
00484   col_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00485     
00486     
00487     nat q[c];
00488     if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00489     else for (nat i=0; i<c; i++) q[p[i]]= i;
00490     for (nat i=0; i<c; ) {
00491       if (q[i] == i) i++;
00492       else {
00493         nat j= q[i], k= q[j];
00494         Mat::col_swap (m, i, j, r, c);
00495         q[i]= k; q[j]= j;
00496       }
00497     }
00498   }
00499 
00500   TMPL static void
00501   row_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00502     
00503     
00504     nat q[r];
00505     if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00506     else for (nat i=0; i<c; i++) q[p[i]]= i;
00507     for (nat i=0; i<r; ) {
00508       if (q[i] == i) i++;
00509       else {
00510         nat j= q[i], k= q[j];
00511         Mat::row_swap (m, i, q[i], r, c);
00512         q[i]= k; q[j]= j;
00513       }
00514     }
00515   }
00516 
00517 }; 
00518 
00519 
00520 
00521 
00522 
00523 template<typename C>
00524 struct pivot_helper {
00525   static inline bool
00526   better (const C& x1, const C& x2) { return false; }
00527 };
00528 
00529 template<typename C> inline bool
00530 better_pivot (const C& x1, const C& x2) {
00531   return pivot_helper<C>::better (x1, x2);
00532 }
00533 
00534 
00535 
00536 
00537 
00538 struct matrix_echelon {};
00539 
00540 template<typename V>
00541 struct implementation<matrix_echelon,V,matrix_naive>:
00542   public implementation<matrix_multiply,V>
00543 {
00544   typedef implementation<matrix_multiply,V> Mat;
00545 
00546 private:
00547 
00548 TMPL static void
00549 col_echelon (C* m, C* k, nat ri, nat ci, nat rf, nat cf, nat rm, nat cm, 
00550              C& num, C& den, bool reduced, nat* permut)
00551 {
00552   
00553   
00554   
00555   
00556   
00557   
00558   
00559   
00560   
00561   
00562   VERIFY ((ri < rm) && (ci < cm) && (rf <= rm) && (cf <= cm), "out of range");
00563   format<C> fm= get_format (m[0]);
00564   num= promote (1, fm); den= promote (1, fm);
00565   nat cb= ci;
00566   if (permut != NULL) for (nat i= 0; i < cm; i++) permut[i]= i;
00567   while (ri < rf && ci < cf) {
00568     
00569     nat i= ri, j= ci;
00570     nat best_index;
00571     C   best_val= promote (0, fm);
00572     for (i= ri; i<rf; i++) {
00573       best_index= cf;
00574       for (j= ci; j<cf; j++) {
00575         C next_val= Mat::entry (m, i, j, rm, cm);
00576         if (next_val != promote (0, fm))
00577           if (best_index == cf || better_pivot (next_val, best_val)) {
00578             best_index= j;
00579             best_val= next_val;
00580           }
00581       }
00582       if (best_index != cf) { j= best_index; break; }
00583     }
00584     
00585     if (i<rf && j<cf) {
00586       
00587       if (j != ci) {
00588         if (k != NULL) Mat::col_swap (k, ci, j, cm, cm);
00589         Mat::col_swap (m, ci, j, rm, cm);
00590         den= -den;
00591         if (permut != NULL) swap (permut[j], permut[ci]); 
00592       }
00593       ri= i;
00594       
00595       C p= Mat::entry (m, ri, ci, rm, cm);
00596       num *= p;
00597       for (nat index= reduced ? cb : ci + 1; index < cf; index++) {
00598         if (index == ci) continue;
00599         C t= Mat::entry (m, ri, index, rm, cm);
00600         if (k != NULL) Mat::col_sweep (k, index, ci, t/p, cm, cm);
00601         Mat::col_sweep (m, index, ci, t/p, rm, cm);
00602       }
00603       ri++; ci++;
00604     }
00605     else  break;
00606   }
00607 }
00608 
00609 public:
00610 
00611 TMPL static inline void
00612 col_echelon (C* m, C* k, nat rm, nat cm, C& num, C& den,
00613              bool reduced= false, nat* permut= NULL) {
00614   if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00615   col_echelon (m, k, 0, 0, rm, cm, rm, cm, num, den, reduced, permut);
00616 }
00617 
00618 TMPL static inline void
00619 col_echelon (C* m, C* k, nat rm, nat cm,
00620              bool reduced= false, nat* permut= NULL) {
00621   if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00622   C num, den;
00623   col_echelon (m, k, rm, cm, num, den, reduced, permut);
00624 }
00625 
00626 }; 
00627 
00628 
00629 
00630 
00631 
00632 struct matrix_determinant {};
00633 
00634 template<typename V>
00635 struct implementation<matrix_determinant,V,matrix_naive>:
00636   public implementation<matrix_echelon,V>
00637 {
00638   typedef implementation<matrix_echelon,V> Mat;
00639 
00640 TMPL static void
00641 det (C& r, const C* m, nat n) {
00642   if (n == 0) { set_as (r, 1); return; }
00643   nat len_c= aligned_size<C,V> (n * n);
00644   C* c= mmx_new<C> (len_c), * k= NULL;
00645   C num, den;
00646   Mat::copy (c, m, n, n);
00647   Mat::col_echelon (c, k, n, n, num, den);
00648   if (Mat::col_is_zero (c, n-1, n, n)) set_as (r, 0);
00649   else r= num / den;
00650   mmx_delete<C> (c, len_c);
00651 }
00652 
00653 }; 
00654 
00655 
00656 
00657 
00658 
00659 struct matrix_kernel {};
00660 
00661 template<typename V>
00662 struct implementation<matrix_kernel,V,matrix_naive>:
00663   public implementation<matrix_echelon,V>
00664 {
00665   typedef implementation<matrix_echelon,V> Mat;
00666 
00667 TMPL static nat
00668 kernel (C* k, const C* m, nat rm, nat cm) {
00669   
00670   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00671   nat i, j, dim, len_c= aligned_size<C,V> (rm * cm);
00672   C* c= mmx_new<C> (len_c);
00673   Mat::copy (c, m, rm, cm);
00674   Mat::col_echelon (c, k, rm, cm);
00675   for (i=0; i<cm; i++)
00676     if (Mat::col_is_zero (c, i, rm, cm)) break;
00677   mmx_delete<C> (c, len_c);
00678   dim= cm - i;
00679   if (dim < cm)
00680     for (j=0; j < dim; j++)
00681       Mat::col_swap (k, j, i+j, cm, cm);
00682     
00683   C x;
00684   for (j=0; j < dim; j++)
00685     for (i=0; i < cm; i++) {
00686       x= *(k + Mat::index (i, j, cm, cm));
00687       if (x != promote (0, x))
00688         Mat::col_div (k, x, j, cm, cm);
00689     }
00690   return dim;
00691 }
00692 
00693 }; 
00694 
00695 
00696 
00697 
00698 
00699 struct matrix_image {};
00700 
00701 template<typename V>
00702 struct implementation<matrix_image,V,matrix_naive>:
00703   public implementation<matrix_echelon,V>
00704 {
00705   typedef implementation<matrix_echelon,V> Mat;
00706 
00707 TMPL static nat
00708 image (C* k, const C* m, nat rm, nat cm) {
00709   
00710   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00711   nat i, len_c= aligned_size<C,V> (rm * cm);
00712   C* c= mmx_new<C> (len_c);
00713   Mat::copy (c, m, rm, cm);
00714   Mat::col_echelon (c, (C*) NULL, rm, cm);
00715   for (i=0; i<cm; i++)
00716     if (Mat::col_is_zero (c, i, rm, cm)) break;
00717   Mat::get_range (k, c, 0, 0, rm, i, rm, cm);
00718   
00719   mmx_delete<C> (c, len_c);
00720   return i;
00721 }
00722 
00723 TMPL static nat
00724 rank (const C* m, nat rm, nat cm) {
00725   VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00726   nat i, len_c= aligned_size<C,V> (rm * cm);
00727   C* c= mmx_new<C> (len_c);
00728   Mat::copy (c, m, rm, cm);
00729   Mat::col_echelon (c, (C*) NULL, rm, cm);
00730   for (i=0; i<cm; i++)
00731     if (Mat::col_is_zero (c, i, rm, cm)) break;
00732   mmx_delete<C> (c, len_c);
00733   return i;
00734 }
00735 
00736 }; 
00737 
00738 
00739 
00740 
00741 
00742 struct matrix_invert {};
00743 
00744 template<typename V>
00745 struct implementation<matrix_invert,V,matrix_naive>:
00746   public implementation<matrix_echelon,V>
00747 {
00748   typedef implementation<matrix_echelon,V> Mat;
00749 
00750 TMPL static void
00751 invert_lower_triangular (C* inv, const C* m, nat n) {
00752   for (nat i=0; i<n; i++) {
00753     C d= Mat::entry (m, i, i, n, n);
00754     ASSERT (d != promote (0, d), "non-invertible matrix");
00755     for (nat k=0; k<=i; k++) {
00756       C sum= promote (0, d);
00757       for (nat j=k; j<i; j++)
00758         sum -= Mat::entry (m, i, j, n, n) * Mat::entry (inv, j, k, n, n);
00759       if (i == k) sum += promote (1, d);
00760       inv[Mat::index (i, k, n, n)]= sum / d;
00761     }
00762     for (nat k=i+1; k<n; k++)
00763       inv[Mat::index (i, k, n, n)]= promote (0, d);
00764   }
00765 }
00766 
00767 TMPL static void
00768 invert (C* k, const C* m, nat n) {
00769   nat l= aligned_size<C,V> (n * n);
00770   C* a= mmx_new<C> (l << 1);
00771   C* b= a + l;
00772   Mat::copy (k, m, n, n);
00773   Mat::col_echelon (k, b, n, n); 
00774   
00775   
00776   invert_lower_triangular (a, k, n);
00777   
00778   Mat::mul (k, b, a, n, n, n);
00779   mmx_delete<C> (a, l << 1);
00780 }
00781 
00782 }; 
00783 
00784 
00785 
00786 
00787 
00788 struct matrix_orthogonalization {};
00789 
00790 template<typename V>
00791 struct implementation<matrix_orthogonalization,V,matrix_naive>:
00792   public implementation<matrix_multiply,V>
00793 {
00794   typedef implementation<matrix_linear,V> Mat;
00795   typedef implementation<vector_linear,V> Vec;
00796 
00797 TMPL static inline C
00798 row_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00799   
00800   return Vec::template vec_binary_big_stride<mul_add_op> (
00801     m + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c),
00802     m + Mat::index (j, 0, r, c), Mat::index (0, 1, r, c), c);
00803 }
00804 
00805 TMPL static inline C
00806 col_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00807   
00808   return Vec::template vec_binary_big_stride<mul_add_op> (
00809     m + Mat::index (0, i, r, c), Mat::index (1, 0, r, c),
00810     m + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), r);
00811 }
00812 
00813 TMPL static void
00814 row_orthogonalize (C* m, nat r, nat c, C* n) {
00815 
00816 
00817 
00818   for (nat i= 0; i < r; i++) {
00819     for (nat j= 0; j < i; j++) {
00820       if (n[j] == 0) continue;
00821       C mu= row_inn_prod (m, i, j, r, c) / n[j];
00822       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00823     }
00824     n[i]= row_inn_prod (m, i, i, r, c);
00825   }
00826 }
00827 
00828 TMPL static void
00829 col_orthogonalize (C* m, nat r, nat c, C* n) {
00830 
00831   for (nat i= 0; i < c; i++) {
00832     for (nat j= 0; j < i; j++) {
00833       if (n[j] == 0) continue;
00834       C mu= col_inn_prod (m, i, j, r, c) / n[j];
00835       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00836     }
00837     n[i]= col_inn_prod (m, i, i, r, c);
00838   }
00839 }
00840 
00841 TMPL static void
00842 row_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00843 
00844 
00845 
00846 
00847   for (nat i= 0; i < r; i++) {
00848     for (nat j= 0; j < i; j++) {
00849       if (n[j] == 0) continue;
00850       C mu= row_inn_prod (m, i, j, r, c) / n[j];
00851       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00852       *(l + Mat::index (i, j, r, r))= mu;
00853     }
00854     n[i]= row_inn_prod (m, i, i, r, c);
00855     *(l + Mat::index (i, i, r, r))= C(1);
00856   }
00857 }
00858 
00859 TMPL static void
00860 col_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00861 
00862   for (nat i= 0; i < c; i++) {
00863     for (nat j= 0; j < i; j++) {
00864       if (n[j] == 0) continue;
00865       C mu= col_inn_prod (m, i, j, r, c) / n[j];
00866       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00867       *(l + Mat::index (j, i, c, c))= mu;
00868     }
00869     n[i]= col_inn_prod (m, i, i, r, c);
00870     *(l + Mat::index (i, i, c, c))= C(1);
00871   }
00872 }
00873 
00874 TMPL static void
00875 row_orthonormalize (C* m, nat r, nat c) {
00876 
00877   for (nat i= 0; i < r; i++) {
00878     for (nat j= 0; j < i; j++)
00879       Mat::row_combine_sub (m, i, C(1), j, row_inn_prod (m, i, j, r, c), r, c);
00880     C a= row_inn_prod (m, i, i, r, c);
00881     if (a != 0) Mat::row_div (m, sqrt (a), i, r, c);
00882   }
00883 }
00884 
00885 TMPL static void
00886 col_orthonormalize (C* m, nat r, nat c) {
00887 
00888   for (nat i= 0; i < c; i++) {
00889     for (nat j= 0; j < i; j++)
00890       Mat::col_combine_sub (m, i, C(1), j, col_inn_prod (m, i, j, r, c), r, c);
00891     C a= col_inn_prod (m, i, i, r, c);
00892     if (a != 0) Mat::col_div (m, sqrt (a), i, r, c);
00893   }
00894 }
00895 
00896 TMPL static void
00897 row_orthonormalize (C* m, nat r, nat c, C* l) {
00898 
00899 
00900 
00901   for (nat i= 0; i < r; i++) {
00902     for (nat j= 0; j < i; j++) {
00903       C mu= row_inn_prod (m, i, j, r, c);
00904       Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00905       *(l + Mat::index (i, j, r, r))= mu;
00906     }
00907     C a= row_inn_prod (m, i, i, r, c);
00908     if (a != 0) {
00909       C b= sqrt (a);
00910       Mat::row_div (m, b, i, r, c);
00911       *(l + Mat::index (i, i, r, r))= b;
00912     }
00913   }
00914 }
00915 
00916 TMPL static void
00917 col_orthonormalize (C* m, nat r, nat c, C* l) {
00918 
00919   for (nat i= 0; i < c; i++) {
00920     for (nat j= 0; j < i; j++) {
00921       C mu= col_inn_prod (m, i, j, r, c);
00922       Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00923       *(l + Mat::index (j, i, c, c))= mu;
00924     }
00925     C a= col_inn_prod (m, i, i, r, c);
00926     if (a != 0) {
00927       C b= sqrt (a);
00928       Mat::col_div (m, b, i, r, c);
00929       *(l + Mat::index (i, i, c, c))= b;
00930     }
00931   }
00932 }
00933 
00934 }; 
00935 
00936 #undef TMPL
00937 }
00938 #endif //__MMX__MATRIX_NAIVE__HPP