00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__MATRIX_NAIVE__HPP
00014 #define __MMX__MATRIX_NAIVE__HPP
00015 #include <basix/vector.hpp>
00016
00017 namespace mmx {
00018 #define TMPL template<typename C>
00019
00020
00021
00022
00023
00024 struct matrix_naive {
00025 typedef vector_naive Vec;
00026 typedef matrix_naive Naive;
00027 typedef matrix_naive Positive;
00028 typedef matrix_naive No_aligned;
00029 typedef matrix_naive No_simd;
00030 typedef matrix_naive No_thread;
00031 typedef matrix_naive No_scaled;
00032 };
00033
00034 template<typename C>
00035 struct matrix_variant_helper {
00036 typedef matrix_naive MV;
00037 };
00038
00039
00040
00041
00042
00043 struct matrix_defaults {};
00044
00045 template<typename V>
00046 struct implementation<matrix_defaults,V,matrix_naive> {
00047 static const nat def_rows = 0;
00048 static const nat def_cols = 0;
00049 static const nat init_rows= 1;
00050 static const nat init_cols= 1;
00051 };
00052
00053
00054
00055
00056
00057 template<typename V>
00058 struct implementation<vector_defaults,V,matrix_naive>:
00059 public implementation<vector_defaults,V,typename V::Vec> {};
00060
00061 template<typename V>
00062 struct implementation<vector_allocate,V,matrix_naive>:
00063 public implementation<vector_allocate,V,typename V::Vec> {};
00064
00065 template<typename V>
00066 struct implementation<vector_abstractions,V,matrix_naive>:
00067 public implementation<vector_abstractions,V,typename V::Vec> {};
00068
00069 template<typename V>
00070 struct implementation<vector_abstractions_stride,V,matrix_naive>:
00071 public implementation<vector_abstractions_stride,V,typename V::Vec> {};
00072
00073 template<typename V>
00074 struct implementation<vector_linear,V,matrix_naive>:
00075 public implementation<vector_linear,V,typename V::Vec> {};
00076
00077 template<typename C, typename V> inline nat
00078 aligned_size (nat r, nat c) {
00079 return aligned_size<C,V> (r * c); }
00080
00081 template<typename C> inline nat
00082 default_aligned_size (nat r, nat c) {
00083 return default_aligned_size<C> (r * c); }
00084
00085
00086
00087
00088
00089 struct matrix_vectorial {};
00090
00091 template<typename V>
00092 struct implementation<matrix_vectorial,V,matrix_naive>:
00093 public implementation<matrix_defaults,V>
00094 {
00095 typedef implementation<vector_linear,V> Vec;
00096
00097 static inline nat
00098 index (nat row, nat col, nat rows, nat cols) {
00099 (void) cols;
00100 return col * rows + row; }
00101
00102 TMPL
00103 static inline const C&
00104 entry (const C* m, nat row, nat col, nat rows, nat cols) {
00105 return * (m + index (row, col, rows, cols)); }
00106
00107
00108
00109
00110
00111
00112
00113
00114 template<typename Op, typename T, typename C> static inline void
00115 mat_unary_stride (T* dest, nat dest_rs, nat dest_cs,
00116 const C* src, nat src_rs, nat src_cs,
00117 nat rows, nat cols) {
00118 for (; cols != 0; dest += dest_cs, src += src_cs, cols--)
00119 Vec::template vec_unary_stride<Op> (dest, dest_rs, src, src_rs, rows); }
00120
00121 template<typename Op, typename T, typename C1, typename C2> static inline void
00122 mat_binary_stride (T* dest, nat dest_rs, nat dest_cs,
00123 const C1* src1, nat src1_rs, nat src1_cs,
00124 const C2* src2, nat src2_rs, nat src2_cs,
00125 nat rows, nat cols) {
00126 for (; cols != 0; dest += dest_cs, src1 += src1_cs, src2 += src2_cs, cols--)
00127 Vec::template vec_binary_stride<Op>
00128 (dest, dest_rs, src1, src1_rs, src2, src2_rs, rows); }
00129
00130 TMPL static inline void
00131 clear (C* dest, nat r, nat c) {
00132 Vec::clear (dest, r * c);
00133 }
00134
00135 TMPL static void
00136 set (C* dest, const C& c, nat rows, nat cols) {
00137 clear (dest, rows, cols);
00138 nat m= min (rows, cols), plus= index (1, 1, rows, cols);
00139 while (m != 0) {
00140 *dest= c;
00141 dest += plus; m--; } }
00142
00143 TMPL static void
00144 get_range (C* d, const C* s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00145 nat rr= r2 - r1, cc = c2 - c1;
00146 mat_unary_stride<id_op>
00147 (d, index (1, 0, rr, cc), index (0, 1, rr, cc),
00148 s + index (r1, c1, r, c), index (1, 0, r, c), index (0, 1, r, c),
00149 rr, cc);
00150 }
00151
00152 TMPL static void
00153 clear_range (C* d, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00154 d += index (r1, c1, r, c);
00155 C* dd= d;
00156 nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00157 for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00158 for (nat i=0; i<(r2-r1); i++, d += rp)
00159 clear_op::set_op (*d);
00160 }
00161
00162 TMPL static void
00163 set_range (C* d, const C& s, nat r1, nat c1, nat r2, nat c2, nat r, nat c) {
00164 d += index (r1, c1, r, c);
00165 C* dd= d;
00166 nat rp= index (1, 0, r, c), cp= index (0, 1, r, c);
00167 for (nat j=0; j<(c2-c1); j++, dd += cp, d = dd)
00168 for (nat i=0; i<(r2-r1); i++, d += rp)
00169 if (i == j) *d = s;
00170 else clear_op::set_op (*d);
00171 }
00172
00173 TMPL static void
00174 copy (C* dest, const C* s, nat r, nat c) {
00175 Vec::copy (dest, s, r * c); }
00176
00177 TMPL static void
00178 transpose (C* dest, const C* src, nat rows, nat cols) {
00179 mat_unary_stride<id_op>
00180 (dest, index (0, 1, cols, rows), index (1, 0, cols, rows),
00181 src, index (1, 0, rows, cols), index (0, 1, rows, cols),
00182 rows, cols); }
00183
00184 TMPL static inline void
00185 print (const port& out, const C* s, nat rows, nat cols) {
00186 out << "[ ";
00187 for (nat i=0; i < rows; i++) {
00188 for (nat j=0; j < cols; j++) {
00189 out << * (s + index (i, j, rows, cols));
00190 if (j + 1 != cols) out << ", ";
00191 }
00192 if (i + 1 != rows) out << ";" << lf;
00193 }
00194 out << " ]";
00195 }
00196
00197 };
00198
00199
00200
00201
00202
00203 struct matrix_linear {};
00204
00205 template<typename V>
00206 struct implementation<matrix_linear,V,matrix_naive>:
00207 public implementation<matrix_vectorial,V>
00208 {
00209 typedef implementation<matrix_vectorial,V> Mat;
00210 typedef implementation<vector_linear,V> Vec;
00211
00212 template<typename Op, typename C, typename X> static inline void
00213 col_unary_scalar (C* dest, const X& x, nat c1, nat r, nat rr, nat cc) {
00214 Vec::template vec_unary_scalar_stride<Op>
00215 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00216 }
00217
00218 template<typename Op, typename C, typename X> static inline void
00219 col_binary_scalar (C* dest, const X& x, nat c1, nat c2, nat r,
00220 nat rr, nat cc) {
00221 Vec::template vec_binary_scalar_stride<Op>
00222 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00223 dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), x, r);
00224 }
00225
00226 template<typename Op, typename C> static inline void
00227 col_binary_combine (C* dest, nat c1, nat c2, nat r, nat rr, nat cc) {
00228 Vec::template vec_binary_combine_stride<Op>
00229 (dest + Mat::index (0, c1, rr, cc), Mat::index (1, 0, rr, cc),
00230 dest + Mat::index (0, c2, rr, cc), Mat::index (1, 0, rr, cc), r);
00231 }
00232
00233 template<typename Op, typename C, typename X> static inline void
00234 row_unary_scalar (C* dest, const X& x, nat r1, nat c, nat rr, nat cc) {
00235 Vec::template vec_unary_scalar_stride<Op>
00236 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00237 }
00238
00239 template<typename Op, typename C, typename X> static inline void
00240 row_binary_scalar (C* dest, const X& x, nat r1, nat r2, nat c,
00241 nat rr, nat cc) {
00242 Vec::template vec_binary_scalar_stride<Op>
00243 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00244 dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), x, c);
00245 }
00246
00247 template<typename Op, typename C> static inline void
00248 row_binary_combine (C* dest, nat r1, nat r2, nat c, nat rr, nat cc) {
00249 Vec::template vec_binary_combine_stride<Op>
00250 (dest + Mat::index (r1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00251 dest + Mat::index (r2, 0, rr, cc), Mat::index (0, 1, rr, cc), c);
00252 }
00253
00254 TMPL static void col_mul (C* s, const C& sc, nat i, nat r, nat c) {
00255 ASSERT (i<c, "out of range");
00256 col_unary_scalar<rmul_op> (s, sc, i, r, r, c); }
00257 TMPL static void col_div (C* s, const C& sc, nat i, nat r, nat c) {
00258 ASSERT (i<c, "out of range");
00259 col_unary_scalar<rdiv_op> (s, sc, i, r, r, c); }
00260 TMPL static void row_mul (C* s, const C& sc, nat i, nat r, nat c) {
00261 ASSERT (i<r, "out of range");
00262 row_unary_scalar<rmul_op> (s, sc, i, c, r, c); }
00263 TMPL static void row_div (C* s, const C& sc, nat i, nat r, nat c) {
00264 ASSERT (i<r, "out of range");
00265 row_unary_scalar<rdiv_op> (s, sc, i, c, r, c); }
00266
00267 TMPL static void col_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00268 ASSERT (i<c && j<c, "out of range");
00269 col_binary_scalar<mul_sub_op> (s, sc, i, j, r, r, c); }
00270 TMPL static void row_sweep (C* s, nat i, nat j, const C& sc, nat r, nat c) {
00271 ASSERT (i<r && j<r, "out of range");
00272 row_binary_scalar<mul_sub_op> (s, sc, i, j, c, r, c); }
00273 TMPL static void col_sweep (C* s, nat i, nat j, nat r, const C& sc,
00274 nat rr, nat cc) {
00275 col_binary_scalar<mul_sub_op> (s, sc, i, j, r, rr, cc); }
00276 TMPL static void row_sweep (C* s, nat i, nat j, nat c, const C& sc,
00277 nat rr, nat cc) {
00278 row_binary_scalar<mul_sub_op> (s, sc, i, j, c, rr, cc); }
00279
00280 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat c) {
00281 ASSERT (i<c && j<c, "out of range");
00282 col_binary_combine<swap_op> (s, i, j, r, r, c); }
00283 TMPL static void row_swap (C* s, nat i, nat j, nat r, nat c) {
00284 ASSERT (i<r && j<r, "out of range");
00285 row_binary_combine<swap_op> (s, i, j, c, r, c); }
00286 TMPL static void col_swap (C* s, nat i, nat j, nat r, nat rr, nat cc) {
00287 col_binary_combine<swap_op> (s, i, j, r, rr, cc); }
00288 TMPL static void row_swap (C* s, nat i, nat j, nat c, nat rr, nat cc) {
00289 row_binary_combine<swap_op> (s, i, j, c, rr, cc); }
00290
00291 TMPL static void
00292 row_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00293 ASSERT (i<r && j<r, "out of range");
00294 C* iti= s + Mat::index (i, 0, r, c);
00295 C* itj= s + Mat::index (j, 0, r, c);
00296 nat plus= Mat::index (0, 1, r, c);
00297 while (c>0) {
00298 *iti= si * (*iti) - sj * (*itj);
00299 iti += plus; itj += plus; c--; } }
00300
00301 TMPL static void
00302 col_combine_sub (C* s, nat i, const C& si, nat j, const C& sj, nat r, nat c) {
00303 ASSERT (i<c && j<c, "out of range");
00304 C* iti= s + Mat::index (0, i, r, c);
00305 C* itj= s + Mat::index (0, j, r, c);
00306 nat plus= Mat::index (1, 0, r, c);
00307 while (r>0) {
00308 *iti= si * (*iti) - sj * (*itj);
00309 iti += plus; itj += plus; r--; } }
00310
00311 TMPL static bool
00312 row_is_zero (const C* s, nat i, nat r, nat c) {
00313 if (r == 0 || c == 0) return true;
00314 return Vec::template vec_binary_test_scalar_stride<equal_op>
00315 (s + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c), promote (0, *s), c); }
00316
00317 TMPL static bool
00318 col_is_zero (const C* s, nat j, nat r, nat c) {
00319 if (r == 0 || c == 0) return true;
00320 return Vec::template vec_binary_test_scalar_stride<equal_op>
00321 (s + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), promote (0, *s), r); }
00322
00323 };
00324
00325
00326
00327
00328
00329 template<typename V>
00330 struct matrix_multiply_threshold {};
00331
00332 struct matrix_multiply_base {};
00333 struct matrix_multiply {};
00334
00335 template<typename V>
00336 struct implementation<matrix_multiply_base,V,matrix_naive>:
00337 public implementation<matrix_linear,V>
00338 {
00339 typedef implementation<matrix_linear,V> Mat;
00340
00341 template<typename Op, typename C>
00342 struct clear_helper {
00343 static inline void op (C* d, nat r, nat rr, nat c, nat cc) {
00344 Mat::clear_range (d, 0, 0, r, c, rr, cc); }
00345 };
00346
00347 template<typename C>
00348 struct clear_helper<mul_add_op,C> {
00349 static inline void op (C* d, nat r, nat rr, nat c, nat cc) {}
00350 };
00351
00352 template<typename Op, typename C> static inline void
00353 clr (C* d, nat r, nat rr, nat c, nat cc) {
00354 clear_helper<Op,C>::op (d, r, rr, c, cc);
00355 }
00356
00357 template<typename Op, typename D, typename S1, typename S2>
00358 static inline void
00359 mul (D* d, const S1* s1, const S2* s2,
00360 nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00361 typedef typename Op::acc_op Acc;
00362 if (l == 0) clr<Op> (d, r, rr, c, cc);
00363 else {
00364 nat ri, ci, li;
00365 D *dr, *dc;
00366 const S1 *s1r, *s1c;
00367 const S2 *s2r, *s2c;
00368 nat drp = Mat::index (1, 0, rr, cc), dcp = Mat::index (0, 1, rr, cc);
00369 nat s1rp= Mat::index (1, 0, rr, ll), s1cp= Mat::index (0, 1, rr, ll);
00370 nat s2rp= Mat::index (1, 0, ll, cc), s2cp= Mat::index (0, 1, ll, cc);
00371 for (ri= r, dr= d, s1r= s1; ri!=0; ri--, dr += drp, s1r += s1rp)
00372 for (ci= c, dc= dr, s2c= s2; ci!=0; ci--, dc += dcp, s2c += s2cp) {
00373 D tmp= *dc;
00374 for (li= l, s1c= s1r, s2r= s2c; li==l; li--, s1c += s1cp, s2r += s2rp)
00375 Op ::set_op (tmp, *s1c, *s2r);
00376 for ( ; li!=0; li--, s1c += s1cp, s2r += s2rp)
00377 Acc::set_op (tmp, *s1c, *s2r);
00378 *dc= tmp;
00379 }
00380 }
00381 }
00382
00383 };
00384
00385 template<typename V>
00386 struct implementation<matrix_multiply,V,matrix_naive>:
00387 public implementation<matrix_multiply_base,V>
00388 {
00389 typedef implementation<matrix_multiply_base,V> Mat;
00390
00391 template<typename Op, typename D, typename S1, typename S2>
00392 static inline void
00393 mul (D* d, const S1* s1, const S2* s2,
00394 nat r, nat rr, nat l, nat ll, nat c, nat cc) {
00395 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00396 }
00397
00398 template<typename D, typename S1, typename S2> static inline void
00399 mul (D* dest, const S1* m1, const S2* m2, nat r, nat l, nat c) {
00400 Mat::template mul<mul_op> (dest, m1, m2, r, r, l, l, c, c);
00401 }
00402
00403 };
00404
00405
00406
00407
00408
00409 struct matrix_iterate {};
00410
00411 template<typename V>
00412 struct implementation<matrix_iterate,V,matrix_naive>:
00413 public implementation<matrix_multiply,V>
00414 {
00415 typedef implementation<matrix_multiply,V> Mat;
00416
00417 TMPL static inline void
00418 iterate_mul (C** d, const C* s, const C* x,
00419 nat rs, nat cs, nat rx, nat cx, nat n) {
00420
00421
00422 VERIFY (cs == rx && rs == cs, "sizes do not match");
00423 if (rs == 0) return;
00424 Mat::get_range (d[0], x, 0, 0, rx, cx, rx, cx);
00425 for (nat i = 1; i < n; i++)
00426 Mat::mul (d[i], s, d[i-1], rs, cs, cx);
00427 }
00428
00429 TMPL static inline void
00430 project_iterate_mul (C** d, const C* y, const C* s, const C* x,
00431 nat ry, nat cy, nat rs, nat cs, nat rx, nat cx, nat n) {
00432
00433 VERIFY (cy == rs && rs == cs && cs == rx, "sizes do not match");
00434 if (n == 0) return;
00435 nat l= aligned_size<C,V> (rx * cx);
00436 C* buf= mmx_new<C> (l << 1);
00437 C* t1= buf, * t2= t1 + l;
00438 Mat::get_range (t1, x, 0, 0, rx, cx, rx, cx);
00439 Mat::mul (d[0], y, t1, ry, cy, cx);
00440 for (nat i = 1; i < n; i++) {
00441 Mat::mul (t2, s, t1, rs, cs, cx);
00442 Mat::mul (d[i], y, t2, ry, cy, cx);
00443 swap (t1, t2);
00444 }
00445 mmx_delete<C> (buf, l << 1);
00446 }
00447
00448 };
00449
00450
00451
00452
00453
00454 struct matrix_permute {};
00455
00456 template<typename V>
00457 struct implementation<matrix_permute,V,matrix_naive>:
00458 public implementation<matrix_multiply,V>
00459 {
00460 typedef implementation<matrix_multiply,V> Mat;
00461 typedef implementation<vector_linear,V> Vec;
00462
00463 TMPL static void
00464 col_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00465
00466 nat rs= Mat::index (1, 0, r, c);
00467 for (nat i=0; i<c; i++)
00468 Vec::template vec_unary_stride<id_op>
00469 (dest + Mat::index (0, i, r, c), rs,
00470 m + Mat::index (0, p[i], r, c), rs, r);
00471 }
00472
00473 TMPL static void
00474 row_permute (C* dest, const C* m, const nat* p, nat r, nat c) {
00475
00476 nat cs= Mat::index (0, 1, r, c);
00477 for (nat i=0; i<r; i++)
00478 Vec::template vec_unary_stride<id_op>
00479 (dest + Mat::index (i, 0, r, c), cs,
00480 m + Mat::index (p[i], 0, r, c), cs, c);
00481 }
00482
00483 TMPL static void
00484 col_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00485
00486
00487 nat q[c];
00488 if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00489 else for (nat i=0; i<c; i++) q[p[i]]= i;
00490 for (nat i=0; i<c; ) {
00491 if (q[i] == i) i++;
00492 else {
00493 nat j= q[i], k= q[j];
00494 Mat::col_swap (m, i, j, r, c);
00495 q[i]= k; q[j]= j;
00496 }
00497 }
00498 }
00499
00500 TMPL static void
00501 row_permute (C* m, const nat* p, nat r, nat c, bool inv= false) {
00502
00503
00504 nat q[r];
00505 if (inv) for (nat i=0; i<c; i++) q[i]= p[i];
00506 else for (nat i=0; i<c; i++) q[p[i]]= i;
00507 for (nat i=0; i<r; ) {
00508 if (q[i] == i) i++;
00509 else {
00510 nat j= q[i], k= q[j];
00511 Mat::row_swap (m, i, q[i], r, c);
00512 q[i]= k; q[j]= j;
00513 }
00514 }
00515 }
00516
00517 };
00518
00519
00520
00521
00522
00523 template<typename C>
00524 struct pivot_helper {
00525 static inline bool
00526 better (const C& x1, const C& x2) { return false; }
00527 };
00528
00529 template<typename C> inline bool
00530 better_pivot (const C& x1, const C& x2) {
00531 return pivot_helper<C>::better (x1, x2);
00532 }
00533
00534
00535
00536
00537
00538 struct matrix_echelon {};
00539
00540 template<typename V>
00541 struct implementation<matrix_echelon,V,matrix_naive>:
00542 public implementation<matrix_multiply,V>
00543 {
00544 typedef implementation<matrix_multiply,V> Mat;
00545
00546 private:
00547
00548 TMPL static void
00549 col_echelon (C* m, C* k, nat ri, nat ci, nat rf, nat cf, nat rm, nat cm,
00550 C& num, C& den, bool reduced, nat* permut)
00551 {
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562 VERIFY ((ri < rm) && (ci < cm) && (rf <= rm) && (cf <= cm), "out of range");
00563 format<C> fm= get_format (m[0]);
00564 num= promote (1, fm); den= promote (1, fm);
00565 nat cb= ci;
00566 if (permut != NULL) for (nat i= 0; i < cm; i++) permut[i]= i;
00567 while (ri < rf && ci < cf) {
00568
00569 nat i= ri, j= ci;
00570 nat best_index;
00571 C best_val= promote (0, fm);
00572 for (i= ri; i<rf; i++) {
00573 best_index= cf;
00574 for (j= ci; j<cf; j++) {
00575 C next_val= Mat::entry (m, i, j, rm, cm);
00576 if (next_val != promote (0, fm))
00577 if (best_index == cf || better_pivot (next_val, best_val)) {
00578 best_index= j;
00579 best_val= next_val;
00580 }
00581 }
00582 if (best_index != cf) { j= best_index; break; }
00583 }
00584
00585 if (i<rf && j<cf) {
00586
00587 if (j != ci) {
00588 if (k != NULL) Mat::col_swap (k, ci, j, cm, cm);
00589 Mat::col_swap (m, ci, j, rm, cm);
00590 den= -den;
00591 if (permut != NULL) swap (permut[j], permut[ci]);
00592 }
00593 ri= i;
00594
00595 C p= Mat::entry (m, ri, ci, rm, cm);
00596 num *= p;
00597 for (nat index= reduced ? cb : ci + 1; index < cf; index++) {
00598 if (index == ci) continue;
00599 C t= Mat::entry (m, ri, index, rm, cm);
00600 if (k != NULL) Mat::col_sweep (k, index, ci, t/p, cm, cm);
00601 Mat::col_sweep (m, index, ci, t/p, rm, cm);
00602 }
00603 ri++; ci++;
00604 }
00605 else break;
00606 }
00607 }
00608
00609 public:
00610
00611 TMPL static inline void
00612 col_echelon (C* m, C* k, nat rm, nat cm, C& num, C& den,
00613 bool reduced= false, nat* permut= NULL) {
00614 if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00615 col_echelon (m, k, 0, 0, rm, cm, rm, cm, num, den, reduced, permut);
00616 }
00617
00618 TMPL static inline void
00619 col_echelon (C* m, C* k, nat rm, nat cm,
00620 bool reduced= false, nat* permut= NULL) {
00621 if (k != NULL && cm != 0) Mat::set (k, promote (1, k[0]), cm, cm);
00622 C num, den;
00623 col_echelon (m, k, rm, cm, num, den, reduced, permut);
00624 }
00625
00626 };
00627
00628
00629
00630
00631
00632 struct matrix_determinant {};
00633
00634 template<typename V>
00635 struct implementation<matrix_determinant,V,matrix_naive>:
00636 public implementation<matrix_echelon,V>
00637 {
00638 typedef implementation<matrix_echelon,V> Mat;
00639
00640 TMPL static void
00641 det (C& r, const C* m, nat n) {
00642 if (n == 0) { set_as (r, 1); return; }
00643 nat len_c= aligned_size<C,V> (n * n);
00644 C* c= mmx_new<C> (len_c), * k= NULL;
00645 C num, den;
00646 Mat::copy (c, m, n, n);
00647 Mat::col_echelon (c, k, n, n, num, den);
00648 if (Mat::col_is_zero (c, n-1, n, n)) set_as (r, 0);
00649 else r= num / den;
00650 mmx_delete<C> (c, len_c);
00651 }
00652
00653 };
00654
00655
00656
00657
00658
00659 struct matrix_kernel {};
00660
00661 template<typename V>
00662 struct implementation<matrix_kernel,V,matrix_naive>:
00663 public implementation<matrix_echelon,V>
00664 {
00665 typedef implementation<matrix_echelon,V> Mat;
00666
00667 TMPL static nat
00668 kernel (C* k, const C* m, nat rm, nat cm) {
00669
00670 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00671 nat i, j, dim, len_c= aligned_size<C,V> (rm * cm);
00672 C* c= mmx_new<C> (len_c);
00673 Mat::copy (c, m, rm, cm);
00674 Mat::col_echelon (c, k, rm, cm);
00675 for (i=0; i<cm; i++)
00676 if (Mat::col_is_zero (c, i, rm, cm)) break;
00677 mmx_delete<C> (c, len_c);
00678 dim= cm - i;
00679 if (dim < cm)
00680 for (j=0; j < dim; j++)
00681 Mat::col_swap (k, j, i+j, cm, cm);
00682
00683 C x;
00684 for (j=0; j < dim; j++)
00685 for (i=0; i < cm; i++) {
00686 x= *(k + Mat::index (i, j, cm, cm));
00687 if (x != promote (0, x))
00688 Mat::col_div (k, x, j, cm, cm);
00689 }
00690 return dim;
00691 }
00692
00693 };
00694
00695
00696
00697
00698
00699 struct matrix_image {};
00700
00701 template<typename V>
00702 struct implementation<matrix_image,V,matrix_naive>:
00703 public implementation<matrix_echelon,V>
00704 {
00705 typedef implementation<matrix_echelon,V> Mat;
00706
00707 TMPL static nat
00708 image (C* k, const C* m, nat rm, nat cm) {
00709
00710 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00711 nat i, len_c= aligned_size<C,V> (rm * cm);
00712 C* c= mmx_new<C> (len_c);
00713 Mat::copy (c, m, rm, cm);
00714 Mat::col_echelon (c, (C*) NULL, rm, cm);
00715 for (i=0; i<cm; i++)
00716 if (Mat::col_is_zero (c, i, rm, cm)) break;
00717 Mat::get_range (k, c, 0, 0, rm, i, rm, cm);
00718
00719 mmx_delete<C> (c, len_c);
00720 return i;
00721 }
00722
00723 TMPL static nat
00724 rank (const C* m, nat rm, nat cm) {
00725 VERIFY (rm > 0 && cm > 0, "unexpected empty matrix");
00726 nat i, len_c= aligned_size<C,V> (rm * cm);
00727 C* c= mmx_new<C> (len_c);
00728 Mat::copy (c, m, rm, cm);
00729 Mat::col_echelon (c, (C*) NULL, rm, cm);
00730 for (i=0; i<cm; i++)
00731 if (Mat::col_is_zero (c, i, rm, cm)) break;
00732 mmx_delete<C> (c, len_c);
00733 return i;
00734 }
00735
00736 };
00737
00738
00739
00740
00741
00742 struct matrix_invert {};
00743
00744 template<typename V>
00745 struct implementation<matrix_invert,V,matrix_naive>:
00746 public implementation<matrix_echelon,V>
00747 {
00748 typedef implementation<matrix_echelon,V> Mat;
00749
00750 TMPL static void
00751 invert_lower_triangular (C* inv, const C* m, nat n) {
00752 for (nat i=0; i<n; i++) {
00753 C d= Mat::entry (m, i, i, n, n);
00754 ASSERT (d != promote (0, d), "non-invertible matrix");
00755 for (nat k=0; k<=i; k++) {
00756 C sum= promote (0, d);
00757 for (nat j=k; j<i; j++)
00758 sum -= Mat::entry (m, i, j, n, n) * Mat::entry (inv, j, k, n, n);
00759 if (i == k) sum += promote (1, d);
00760 inv[Mat::index (i, k, n, n)]= sum / d;
00761 }
00762 for (nat k=i+1; k<n; k++)
00763 inv[Mat::index (i, k, n, n)]= promote (0, d);
00764 }
00765 }
00766
00767 TMPL static void
00768 invert (C* k, const C* m, nat n) {
00769 nat l= aligned_size<C,V> (n * n);
00770 C* a= mmx_new<C> (l << 1);
00771 C* b= a + l;
00772 Mat::copy (k, m, n, n);
00773 Mat::col_echelon (k, b, n, n);
00774
00775
00776 invert_lower_triangular (a, k, n);
00777
00778 Mat::mul (k, b, a, n, n, n);
00779 mmx_delete<C> (a, l << 1);
00780 }
00781
00782 };
00783
00784
00785
00786
00787
00788 struct matrix_orthogonalization {};
00789
00790 template<typename V>
00791 struct implementation<matrix_orthogonalization,V,matrix_naive>:
00792 public implementation<matrix_multiply,V>
00793 {
00794 typedef implementation<matrix_linear,V> Mat;
00795 typedef implementation<vector_linear,V> Vec;
00796
00797 TMPL static inline C
00798 row_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00799
00800 return Vec::template vec_binary_big_stride<mul_add_op> (
00801 m + Mat::index (i, 0, r, c), Mat::index (0, 1, r, c),
00802 m + Mat::index (j, 0, r, c), Mat::index (0, 1, r, c), c);
00803 }
00804
00805 TMPL static inline C
00806 col_inn_prod (C* m, nat i, nat j, nat r, nat c) {
00807
00808 return Vec::template vec_binary_big_stride<mul_add_op> (
00809 m + Mat::index (0, i, r, c), Mat::index (1, 0, r, c),
00810 m + Mat::index (0, j, r, c), Mat::index (1, 0, r, c), r);
00811 }
00812
00813 TMPL static void
00814 row_orthogonalize (C* m, nat r, nat c, C* n) {
00815
00816
00817
00818 for (nat i= 0; i < r; i++) {
00819 for (nat j= 0; j < i; j++) {
00820 if (n[j] == 0) continue;
00821 C mu= row_inn_prod (m, i, j, r, c) / n[j];
00822 Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00823 }
00824 n[i]= row_inn_prod (m, i, i, r, c);
00825 }
00826 }
00827
00828 TMPL static void
00829 col_orthogonalize (C* m, nat r, nat c, C* n) {
00830
00831 for (nat i= 0; i < c; i++) {
00832 for (nat j= 0; j < i; j++) {
00833 if (n[j] == 0) continue;
00834 C mu= col_inn_prod (m, i, j, r, c) / n[j];
00835 Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00836 }
00837 n[i]= col_inn_prod (m, i, i, r, c);
00838 }
00839 }
00840
00841 TMPL static void
00842 row_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00843
00844
00845
00846
00847 for (nat i= 0; i < r; i++) {
00848 for (nat j= 0; j < i; j++) {
00849 if (n[j] == 0) continue;
00850 C mu= row_inn_prod (m, i, j, r, c) / n[j];
00851 Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00852 *(l + Mat::index (i, j, r, r))= mu;
00853 }
00854 n[i]= row_inn_prod (m, i, i, r, c);
00855 *(l + Mat::index (i, i, r, r))= C(1);
00856 }
00857 }
00858
00859 TMPL static void
00860 col_orthogonalize (C* m, nat r, nat c, C* l, C* n) {
00861
00862 for (nat i= 0; i < c; i++) {
00863 for (nat j= 0; j < i; j++) {
00864 if (n[j] == 0) continue;
00865 C mu= col_inn_prod (m, i, j, r, c) / n[j];
00866 Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00867 *(l + Mat::index (j, i, c, c))= mu;
00868 }
00869 n[i]= col_inn_prod (m, i, i, r, c);
00870 *(l + Mat::index (i, i, c, c))= C(1);
00871 }
00872 }
00873
00874 TMPL static void
00875 row_orthonormalize (C* m, nat r, nat c) {
00876
00877 for (nat i= 0; i < r; i++) {
00878 for (nat j= 0; j < i; j++)
00879 Mat::row_combine_sub (m, i, C(1), j, row_inn_prod (m, i, j, r, c), r, c);
00880 C a= row_inn_prod (m, i, i, r, c);
00881 if (a != 0) Mat::row_div (m, sqrt (a), i, r, c);
00882 }
00883 }
00884
00885 TMPL static void
00886 col_orthonormalize (C* m, nat r, nat c) {
00887
00888 for (nat i= 0; i < c; i++) {
00889 for (nat j= 0; j < i; j++)
00890 Mat::col_combine_sub (m, i, C(1), j, col_inn_prod (m, i, j, r, c), r, c);
00891 C a= col_inn_prod (m, i, i, r, c);
00892 if (a != 0) Mat::col_div (m, sqrt (a), i, r, c);
00893 }
00894 }
00895
00896 TMPL static void
00897 row_orthonormalize (C* m, nat r, nat c, C* l) {
00898
00899
00900
00901 for (nat i= 0; i < r; i++) {
00902 for (nat j= 0; j < i; j++) {
00903 C mu= row_inn_prod (m, i, j, r, c);
00904 Mat::row_combine_sub (m, i, C(1), j, mu, r, c);
00905 *(l + Mat::index (i, j, r, r))= mu;
00906 }
00907 C a= row_inn_prod (m, i, i, r, c);
00908 if (a != 0) {
00909 C b= sqrt (a);
00910 Mat::row_div (m, b, i, r, c);
00911 *(l + Mat::index (i, i, r, r))= b;
00912 }
00913 }
00914 }
00915
00916 TMPL static void
00917 col_orthonormalize (C* m, nat r, nat c, C* l) {
00918
00919 for (nat i= 0; i < c; i++) {
00920 for (nat j= 0; j < i; j++) {
00921 C mu= col_inn_prod (m, i, j, r, c);
00922 Mat::col_combine_sub (m, i, C(1), j, mu, r, c);
00923 *(l + Mat::index (j, i, c, c))= mu;
00924 }
00925 C a= col_inn_prod (m, i, i, r, c);
00926 if (a != 0) {
00927 C b= sqrt (a);
00928 Mat::col_div (m, b, i, r, c);
00929 *(l + Mat::index (i, i, c, c))= b;
00930 }
00931 }
00932 }
00933
00934 };
00935
00936 #undef TMPL
00937 }
00938 #endif //__MMX__MATRIX_NAIVE__HPP