00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__MATRIX_ALIGNED__HPP
00014 #define __MMX__MATRIX_ALIGNED__HPP
00015 #include <basix/int.hpp>
00016 #include <numerix/simd.hpp>
00017 #include <algebramix/vector_aligned.hpp>
00018
00019 namespace mmx {
00020
00021
00022
00023
00024
00025 template<typename V, typename W>
00026 struct matrix_assume_aligned: public V {
00027
00028
00029
00030
00031 typedef vector_assume_aligned<typename V::Vec,
00032 typename W::Vec> Vec;
00033 typedef matrix_assume_aligned<typename V::Naive,
00034 typename W::Naive> Naive;
00035 typedef matrix_assume_aligned<typename V::Positive,
00036 typename W::Positive> Positive;
00037 typedef typename V::No_aligned No_aligned;
00038 typedef matrix_assume_aligned<typename V::No_thread,
00039 typename W::No_thread> No_thread;
00040 typedef matrix_assume_aligned<typename V::No_scaled,
00041 typename W::No_scaled> No_scaled;
00042 };
00043
00044 template<typename F, typename Z, typename V, typename W>
00045 struct implementation<F,Z,matrix_assume_aligned<V,W> >:
00046 public implementation<F,Z,V> {};
00047
00048 template<typename Z, typename V, typename W>
00049 struct implementation<vector_allocate,Z,matrix_assume_aligned<V,W> >:
00050 public implementation<vector_allocate,
00051 typename matrix_assume_aligned<V,W>::Vec> {};
00052
00053 template<typename V, typename W>
00054 struct matrix_aligned: public V {
00055
00056 typedef vector_aligned<typename V::Vec,
00057 typename W::Vec> Vec;
00058 typedef matrix_aligned<typename V::Naive,
00059 typename W::Naive> Naive;
00060 typedef matrix_aligned<typename V::Positive,
00061 typename W::Positive> Positive;
00062 typedef typename V::No_aligned No_aligned;
00063 typedef matrix_aligned<typename V::No_thread,
00064 typename W::No_thread> No_thread;
00065 typedef matrix_aligned<typename V::No_scaled,
00066 typename W::No_scaled> No_scaled;
00067 };
00068
00069 template<typename F, typename Z, typename V, typename W>
00070 struct implementation<F,Z,matrix_aligned<V,W> >:
00071 public implementation<F,Z,V> {};
00072
00073 template<typename Z, typename V, typename W>
00074 struct implementation<vector_allocate,Z,matrix_aligned<V,W> >:
00075 public implementation<vector_allocate,
00076 typename matrix_aligned<V,W>::Vec> {};
00077
00078
00079
00080
00081
00082 template<typename V, typename W,
00083 typename Op, typename D, typename S1, typename S2>
00084 struct mat_mul_aligned_helper {
00085 static inline void
00086 mul (D* d, const S1* s1, const S2* s2,
00087 nat r, nat rr, nat l, nat ll, nat c, nat cc)
00088 {
00089 typedef implementation<matrix_multiply,V> Mat;
00090 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00091 }
00092 };
00093
00094
00095
00096
00097
00098 template<typename Z, typename V, typename W>
00099 struct implementation<matrix_multiply_base,Z,matrix_assume_aligned<V,W> >:
00100 public implementation<matrix_linear,Z>
00101 {
00102 template<typename Op, typename D, typename S1, typename S2>
00103 static inline void
00104 mul (D* d, const S1* s1, const S2* s2,
00105 nat r, nat rr, nat l, nat ll, nat c, nat cc)
00106 {
00107 mat_mul_aligned_helper<V,W,Op,D,S1,S2>::
00108 mul (d, s1, s2, r, rr, l, ll, c, cc);
00109 }
00110 };
00111
00112
00113
00114
00115
00116 template<typename Z, typename V, typename W>
00117 struct implementation<matrix_multiply_base,Z,matrix_aligned<V,W> >:
00118 public implementation<matrix_linear,Z>
00119 {
00120 typedef implementation<matrix_multiply,V> Mat;
00121 typedef implementation<matrix_multiply,matrix_assume_aligned<V,W> > AMat;
00122 typedef implementation<vector_allocate,
00123 typename matrix_aligned<V,W>::Vec> Vec;
00124
00125 template<typename Op, typename D, typename S1, typename S2>
00126 static inline void
00127 mul (D* d, const S1* s1, const S2* s2,
00128 nat r, nat rr, nat l, nat ll, nat c, nat cc)
00129 {
00130 if (r == 0) return;
00131 if (mask_helper<D>::len != 16 || mask_helper<S1>::len != 16) {
00132 Mat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, cc);
00133 return;
00134 }
00135 nat ar= Vec::template vec_floor_aligned_size<D> (r);
00136 nat sr= r - ar;
00137 nat arr= Vec::template vec_floor_aligned_size<D> (rr);
00138 if (sr != 0) {
00139 if (ar != 0) mul<Op> (d, s1, s2, ar, rr, l, ll, c, cc);
00140 Mat::template mul<Op> (d + Mat::index (ar, 0, rr, cc),
00141 s1 + Mat::index (ar, 0, rr, ll), s2,
00142 sr, rr, l, ll, c, cc);
00143
00144 }
00145 else if (! Vec::template vec_is_aligned (d) ||
00146 ! Vec::template vec_is_aligned (s1) ||
00147 arr != rr) {
00148 D* xd = mmx_new<D> (r * c);
00149 S1* xs1= mmx_new<S1> (r * l);
00150 S2* xs2= mmx_new<S2> (l * c);
00151 Mat::template mat_unary_stride<id_op>
00152 (xs1, Mat::index (1, 0, r , l ), Mat::index (0, 1, r , l ),
00153 s1 , Mat::index (1, 0, rr, ll), Mat::index (0, 1, rr, ll), r, l);
00154 Mat::template mat_unary_stride<id_op>
00155 (xs2, Mat::index (1, 0, l , c ), Mat::index (0, 1, l , c ),
00156 s2 , Mat::index (1, 0, ll, cc), Mat::index (0, 1, ll, cc), l, c);
00157 AMat::template mul<Op> (xd, xs1, xs2, r, r, l, l, c, c);
00158 Mat::template mat_unary_stride<typename Op::nomul_op>
00159 (d , Mat::index (1, 0, rr, cc), Mat::index (0, 1, rr, cc),
00160 xd, Mat::index (1, 0, r , c ), Mat::index (0, 1, r , c ), r, c);
00161 mmx_delete<D> (xd, r * c);
00162 mmx_delete<S1> (xs1, r * l);
00163 mmx_delete<S2> (xs2, l * c);
00164 }
00165 else
00166 AMat::template mul<Op> (d, s1, s2, r, rr, l, ll, c, c);
00167 }
00168
00169 };
00170
00171 }
00172 #endif // __MMX__MATRIX_ALIGNED__HPP