00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 #ifndef __MMX__MATRIX_ALIGNED__HPP
00014 #define __MMX__MATRIX_ALIGNED__HPP
00015 #include <basix/int.hpp>
00016 #include <numerix/simd.hpp>
00017 #include <algebramix/vector_aligned.hpp>
00018 
00019 namespace mmx {
00020 
00021 
00022 
00023 
00024 
00025 template<typename V, typename W>
00026 struct matrix_assume_aligned: public V {
00027   
00028   
00029   
00030  
00031   typedef vector_assume_aligned<typename V::Vec,
00032                                 typename W::Vec> Vec;
00033   typedef matrix_assume_aligned<typename V::Naive,
00034                                 typename W::Naive> Naive;
00035   typedef matrix_assume_aligned<typename V::Positive,
00036                                 typename W::Positive> Positive;
00037   typedef typename V::No_aligned No_aligned;
00038   typedef matrix_assume_aligned<typename V::No_thread,
00039                                 typename W::No_thread> No_thread;
00040   typedef matrix_assume_aligned<typename V::No_scaled,
00041                                 typename W::No_scaled> No_scaled;
00042 };
00043 
00044 template<typename F, typename Z, typename V, typename W>
00045 struct implementation<F,Z,matrix_assume_aligned<V,W> >:
00046     public implementation<F,Z,V> {};
00047 
00048 template<typename Z, typename V, typename W>
00049 struct implementation<vector_allocate,Z,matrix_assume_aligned<V,W> >:
00050   public implementation<vector_allocate,
00051                         typename matrix_assume_aligned<V,W>::Vec> {};
00052 
00053 template<typename V, typename W>
00054 struct matrix_aligned: public V {
00055   
00056   typedef vector_aligned<typename V::Vec,
00057                          typename W::Vec> Vec;
00058   typedef matrix_aligned<typename V::Naive,
00059                          typename W::Naive> Naive;
00060   typedef matrix_aligned<typename V::Positive,
00061                          typename W::Positive> Positive;
00062   typedef typename V::No_aligned No_aligned;
00063   typedef matrix_aligned<typename V::No_thread,
00064                          typename W::No_thread> No_thread;
00065   typedef matrix_aligned<typename V::No_scaled,
00066                          typename W::No_scaled> No_scaled;
00067 };
00068 
00069 template<typename F, typename Z, typename V, typename W>
00070 struct implementation<F,Z,matrix_aligned<V,W> >:
00071     public implementation<F,Z,V> {};
00072 
00073 template<typename Z, typename V, typename W>
00074 struct implementation<vector_allocate,Z,matrix_aligned<V,W> >:
00075   public implementation<vector_allocate,
00076                         typename matrix_aligned<V,W>::Vec> {};
00077 
00078 
00079 
00080 
00081 
00082 template<typename V, typename W,
00083          typename Op, typename D, typename S1, typename S2>
00084 struct mat_mul_aligned_helper {
00085   static inline void
00086   mul (D* d, const S1* s1, const S2* s2,
00087        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00088   {
00089     typedef implementation<matrix_multiply,V> Mat;
00090     Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00091   }
00092 };
00093 
00094 
00095 
00096 
00097 
00098 template<typename Z, typename V, typename W>
00099 struct implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> >:
00100   public implementation<matrix_linear,Z>
00101 {
00102   template<typename Op, typename D, typename S1, typename S2>
00103   static inline void
00104   mul (D* d, const S1* s1, const S2* s2,
00105        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00106   {
00107     mat_mul_aligned_helper<V,W,Op,D,S1,S2>::
00108       mul (d, s1, s2, r, rr, l, ll, c, cc);
00109   }
00110 }; 
00111 
00112 
00113 
00114 
00115 
00116 template<typename Z, typename V, typename W>
00117 struct implementation<matrix_multiply_base,Z,matrix_aligned<V,W> >:
00118   public implementation<matrix_linear,Z>
00119 {
00120   typedef implementation<matrix_multiply,V> Mat;
00121   typedef implementation<matrix_multiply,matrix_assume_aligned<V,W> > AMat;
00122   typedef implementation<vector_allocate,
00123                          typename matrix_aligned<V,W>::Vec> Vec;
00124 
00125   template<typename Op, typename D, typename S1, typename S2>
00126   static inline void
00127   mul (D* d, const S1* s1, const S2* s2,
00128        nat r, nat rr, nat l, nat ll, nat c, nat cc)
00129   {
00130     if (r == 0) return;
00131     if (mask_helper<D>::len != 16 || mask_helper<S1>::len != 16) {
00132       Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00133       return;
00134     }
00135     nat ar= Vec::template vec_floor_aligned_size<D> (r);
00136     nat sr= r - ar;
00137     nat arr= Vec::template vec_floor_aligned_size<D> (rr);    
00138     if (sr != 0) {
00139       if (ar != 0) mul<Op> (d, s1, s2, ar, rr, l, ll, c, cc);
00140       Mat::template mul<Op> (d  + Mat::index (ar, 0, rr, cc),
00141                              s1 + Mat::index (ar, 0, rr, ll), s2,
00142                              sr, rr, l, ll, c, cc);
00143       
00144     }
00145     else if (! Vec::template vec_is_aligned (d) ||
00146              ! Vec::template vec_is_aligned (s1) ||
00147              arr != rr) {
00148       D* xd = mmx_new<D> (r * c);
00149       S1* xs1= mmx_new<S1> (r * l);
00150       S2* xs2= mmx_new<S2> (l * c);
00151       Mat::template mat_unary_stride<id_op>
00152         (xs1, Mat::index (1, 0, r , l ), Mat::index (0, 1, r , l ),
00153          s1 , Mat::index (1, 0, rr, ll), Mat::index (0, 1, rr, ll), r, l);
00154       Mat::template mat_unary_stride<id_op>
00155         (xs2, Mat::index (1, 0, l , c ), Mat::index (0, 1, l , c ),
00156          s2 , Mat::index (1, 0, ll, cc), Mat::index (0, 1, ll, cc), l, c);
00157       AMat::template mul<Op> (xd, xs1, xs2, r, r, l, l, c, c);
00158       Mat::template mat_unary_stride<typename Op::nomul_op>
00159         (d , Mat::index (1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00160          xd, Mat::index (1, 0, r , c ), Mat::index (0, 1, r , c ), r, c);
00161       mmx_delete<D> (xd, r * c);
00162       mmx_delete<S1> (xs1, r * l);
00163       mmx_delete<S2> (xs2, l * c);
00164     }
00165     else
00166       AMat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, c);
00167   }
00168   
00169 }; 
00170 
00171 } 
00172 #endif // __MMX__MATRIX_ALIGNED__HPP