00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__VECTOR_ALIGNED__HPP
00014 #define __MMX__VECTOR_ALIGNED__HPP
00015 #include <basix/int.hpp>
00016 #include <numerix/simd.hpp>
00017
00018 namespace mmx {
00019
00020
00021
00022
00023
00024 template<typename V, typename W>
00025 struct vector_assume_aligned: public V {
00026
00027
00028
00029 typedef vector_assume_aligned<typename V::Naive,
00030 typename W::Naive> Naive;
00031 typedef vector_assume_aligned<typename V::Aligned,
00032 typename W::Aligned> Aligned;
00033 typedef vector_assume_aligned<typename V::No_simd,
00034 typename W::No_simd> No_simd;
00035 typedef vector_assume_aligned<typename V::No_thread,
00036 typename W::No_thread> No_thread;
00037 };
00038
00039 template<typename F, typename Z, typename V, typename W>
00040 struct implementation<F,Z,vector_assume_aligned<V,W> >:
00041 public implementation<F,Z,V> {};
00042
00043 template<typename V, typename W>
00044 struct vector_aligned: public V {
00045
00046 typedef vector_aligned <typename V::Naive,
00047 typename W::Naive> Naive;
00048 typedef vector_assume_aligned<typename V::Aligned,
00049 typename W::Aligned> Aligned;
00050 typedef vector_aligned <typename V::No_simd,
00051 typename W::No_simd> No_simd;
00052 typedef vector_aligned <typename V::No_thread,
00053 typename W::No_thread> No_thread;
00054 };
00055
00056 template<typename F, typename Z, typename V, typename W>
00057 struct implementation<F,Z,vector_aligned<V,W> >:
00058 public implementation<F,Z,V> {};
00059
00060
00061
00062
00063
00064 template<typename V, typename W,
00065 typename Op, typename T>
00066 struct vec_nullary_aligned_helper {
00067 typedef implementation<vector_abstractions,V> Vec;
00068 static inline void op (T* dest, nat n) {
00069 Vec::template vec_nullary<Op,T> (dest, n); } };
00070
00071 template<typename V, typename W,
00072 typename Op, typename T, typename C>
00073 struct vec_unary_aligned_helper {
00074 typedef implementation<vector_abstractions,V> Vec;
00075 static inline void op (T* dest, const C* s, nat n) {
00076 Vec::template vec_unary<Op,T,C> (dest, s, n); } };
00077
00078 template<typename V, typename W,
00079 typename Op, typename T, typename C1, typename C2>
00080 struct vec_binary_aligned_helper {
00081 typedef implementation<vector_abstractions,V> Vec;
00082 static inline void op (T* dest, const C1* s1, const C2* s2, nat n) {
00083 Vec::template vec_binary<Op,T,C1,C2> (dest, s1, s2, n); } };
00084
00085 template<typename V, typename W,
00086 typename Op, typename T, typename X>
00087 struct vec_unary_scalar_aligned_helper {
00088 typedef implementation<vector_abstractions,V> Vec;
00089 static inline void op (T* dest, const X& x, nat n) {
00090 Vec::template vec_unary_scalar<Op,T,X> (dest, x, n); } };
00091
00092 template<typename V, typename W,
00093 typename Op, typename T, typename C, typename X>
00094 struct vec_binary_scalar_aligned_helper {
00095 typedef implementation<vector_abstractions,V> Vec;
00096 static inline void op (T* dest, const C* s, const X& x, nat n) {
00097 Vec::template vec_binary_scalar<Op,T,C,X> (dest, s, x, n); } };
00098
00099 template<typename V, typename W,
00100 typename Op, typename T, typename C1, typename C2, typename X>
00101 struct vec_ternary_scalar_aligned_helper {
00102 typedef implementation<vector_abstractions,V> Vec;
00103 static inline void op (T* dest, const C1* s1, const C2* s2,
00104 const X& x, nat n) {
00105 Vec::template vec_ternary_scalar<Op,T,C1,C2,X> (dest, s1, s2, x, n); } };
00106
00107 template<typename V, typename W,
00108 typename Op, typename T, typename C, typename X1, typename X2>
00109 struct vec_ternary_scalar_scalar_aligned_helper {
00110 typedef implementation<vector_abstractions,V> Vec;
00111 static inline void op (T* dest, const C* s,
00112 const X1& x1, const X2& x2, nat n) {
00113 Vec::template vec_ternary_scalar_scalar<Op,T,C,X1,X2>
00114 (dest, s, x1, x2, n); } };
00115
00116 template<typename V, typename W,
00117 typename Op, typename C>
00118 struct vec_unary_test_aligned_helper {
00119 typedef implementation<vector_abstractions,V> Vec;
00120 static inline bool op (const C* s, nat n) {
00121 return Vec::template vec_unary_test (s, n); } };
00122
00123 template<typename V, typename W,
00124 typename Op, typename C1, typename C2>
00125 struct vec_binary_test_aligned_helper {
00126 typedef implementation<vector_abstractions,V> Vec;
00127 static inline bool op (const C1* s1, const C2* s2, nat n) {
00128 return Vec::template vec_binary_test<Op,C1,C2> (s1, s2, n); } };
00129
00130 template<typename V, typename W,
00131 typename Op, typename C, typename X>
00132 struct vec_binary_test_scalar_aligned_helper {
00133 typedef implementation<vector_abstractions,V> Vec;
00134 static inline bool op (const C* s, const X& x, nat n) {
00135 return Vec::template vec_binary_test_scalar<Op,C,X> (s, x, n); } };
00136
00137 template<typename V, typename W,
00138 typename Op, typename C>
00139 struct vec_unary_big_aligned_helper {
00140 typedef implementation<vector_abstractions,V> Vec;
00141 static inline C op (const C* s, nat n) {
00142 return Vec::template vec_unary_big<Op,C> (s, n); }
00143 static inline C op (const C* s, nat n, const format<C>& fm) {
00144 return Vec::template vec_unary_big<Op,C> (s, n, fm); } };
00145
00146 template<typename V, typename W,
00147 typename Op, typename C1, typename C2>
00148 struct vec_binary_big_aligned_helper {
00149 typedef implementation<vector_abstractions,V> Vec;
00150 static inline C1 op (const C1* s1, const C2* s2, nat n) {
00151 return Vec::template vec_binary_big<Op,C1,C2> (s1, s2, n); }
00152 static inline C1 op (const C1* s1, const C2* s2, nat n,
00153 const format<C1>& fm1, const format<C2>& fm2) {
00154 return Vec::template vec_binary_big<Op,C1,C2> (s1, s2, n, fm1, fm2); } };
00155
00156
00157
00158
00159
00160
00161
00162 template<typename C>
00163 struct align_helper {
00164 static const nat m = Simd_size (C);
00165 static const nat log_m = int_bitsize_helper<nat, m>::value - 1;
00166 static const nat hi_mask_m = ((nat) -1) << log_m;
00167 static const nat lo_mask_m = m - 1;
00168 static const intptr_t len = m * sizeof (C);
00169 static const intptr_t log_len= int_bitsize_helper<nat, len>::value - 1;
00170 static const intptr_t lo_mask_len= len - 1;
00171 };
00172
00173
00174
00175
00176
00177 template<typename Z, typename V, typename W>
00178 struct implementation<vector_allocate,Z,vector_assume_aligned<V,W> >:
00179 public implementation<vector_defaults,Z>
00180 {
00181
00182 template<typename C> static inline nat
00183 vec_aligned_size (nat n) {
00184 VERIFY (align_helper<C>::m == (((nat) 1) << align_helper<C>::log_m),
00185 "simd size must be a power of two");
00186 return (n + align_helper<C>::m - 1) & align_helper<C>::hi_mask_m;
00187 }
00188
00189 template<typename C> static inline nat
00190 vec_floor_aligned_size (nat n) {
00191 return n & align_helper<C>::hi_mask_m;
00192 }
00193
00194 template<typename C> static inline bool
00195 vec_is_aligned_size (nat n) {
00196 return (n & align_helper<C>::lo_mask_m) == 0; }
00197
00198 template<typename C> static inline bool
00199 vec_is_aligned (const C* s) {
00200 return (align_helper<C>::len == 16) ?
00201 (((intptr_t) s) & align_helper<C>::lo_mask_len) == 0 : false; }
00202
00203 template<typename C> static inline intptr_t
00204 vec_ceil_shift (const C* s) {
00205 if (align_helper<C>::len == 16) {
00206 intptr_t r= (intptr_t) s & align_helper<C>::lo_mask_len;
00207 return (r != 0) ? align_helper<C>::len - r : r;
00208 }
00209 return 0; }
00210
00211 };
00212
00213 template<typename Z, typename V, typename W>
00214 struct implementation<vector_abstractions,Z,vector_assume_aligned<V,W> >:
00215 public implementation<vector_allocate,Z>
00216 {
00217 typedef implementation<vector_allocate,Z> Vec;
00218
00219 template<typename Op, typename T> static inline void
00220 vec_nullary (T* dest, nat n) {
00221 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00222 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00223 vec_nullary_aligned_helper<V,W,Op,T>::op (dest, n); }
00224
00225 template<typename Op, typename T, typename C> static inline void
00226 vec_unary (T* dest, const C* s, nat n) {
00227 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00228 VERIFY (Vec::vec_is_aligned (s) , "address must be aligned");
00229 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00230 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00231 vec_unary_aligned_helper<V,W,Op,T,C>::op (dest, s, n); }
00232
00233 template<typename Op, typename T, typename C1, typename C2> static inline void
00234 vec_binary (T* dest, const C1* s1, const C2* s2, nat n) {
00235 VERIFY (Vec::vec_is_aligned (dest) , "address must be aligned");
00236 VERIFY (Vec::vec_is_aligned (s1) , "address must be aligned");
00237 VERIFY (Vec::vec_is_aligned (s2) , "address must be aligned");
00238 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00239 VERIFY (Vec::template vec_is_aligned_size<C1>(n), "size must be aligned");
00240 VERIFY (Vec::template vec_is_aligned_size<C2>(n), "size must be aligned");
00241 vec_binary_aligned_helper<V,W,Op,T,C1,C2>::op (dest, s1, s2, n); }
00242
00243 template<typename Op, typename T, typename X> static inline void
00244 vec_unary_scalar (T* dest, const X& x, nat n) {
00245 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00246 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00247 vec_unary_scalar_aligned_helper<V,W,Op,T,X>::op (dest, x, n); }
00248
00249 template<typename Op, typename T, typename C, typename X> static inline void
00250 vec_binary_scalar (T* dest, const C* s, const X& x, nat n) {
00251 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00252 VERIFY (Vec::vec_is_aligned (s) , "address must be aligned");
00253 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00254 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00255 vec_binary_scalar_aligned_helper<V,W,Op,T,C,X>::op (dest, s, x, n); }
00256
00257 template<typename Op, typename T, typename C1, typename C2, typename X>
00258 static inline void
00259 vec_ternary_scalar (T* dest, const C1* s1, const C2* s2, const X& x, nat n) {
00260 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00261 VERIFY (Vec::vec_is_aligned (s1) , "address must be aligned");
00262 VERIFY (Vec::vec_is_aligned (s2) , "address must be aligned");
00263 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00264 VERIFY (Vec::template vec_is_aligned_size<C1> (n), "size must be aligned");
00265 VERIFY (Vec::template vec_is_aligned_size<C2> (n), "size must be aligned");
00266 vec_ternary_scalar_aligned_helper<V,W,Op,T,C1,C2,X>::op (dest, s1, s2, x, n); }
00267
00268 template<typename Op, typename T, typename C, typename X1, typename X2>
00269 static inline void
00270 vec_ternary_scalar_scalar (T* dest, const C* s,
00271 const X1& x1, const X2& x2, nat n) {
00272 VERIFY (Vec::vec_is_aligned (dest), "address must be aligned");
00273 VERIFY (Vec::vec_is_aligned (s) , "address must be aligned");
00274 VERIFY (Vec::template vec_is_aligned_size<T> (n), "size must be aligned");
00275 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00276 vec_ternary_scalar_scalar_aligned_helper<V,W,Op,T,C,X1,X1>::op
00277 (dest, s, x1, x2, n); }
00278
00279 template<typename Op, typename C> static inline bool
00280 vec_unary_test (const C* s, nat n) {
00281 VERIFY (Vec::vec_is_aligned (s), "address must be aligned");
00282 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00283 return vec_unary_test_aligned_helper<V,W,Op,C>::op (s, n); }
00284
00285 template<typename Op, typename C1, typename C2> static inline bool
00286 vec_binary_test (const C1* s1, const C2* s2, nat n) {
00287 VERIFY (Vec::vec_is_aligned (s1), "address must be aligned");
00288 VERIFY (Vec::vec_is_aligned (s2), "address must be aligned");
00289 VERIFY (Vec::template vec_is_aligned_size<C1> (n), "size must be aligned");
00290 VERIFY (Vec::template vec_is_aligned_size<C2> (n), "size must be aligned");
00291 return vec_binary_test_aligned_helper<V,W,Op,C1,C2>::op (s1, s2, n); }
00292
00293 template<typename Op, typename C, typename X> static inline bool
00294 vec_binary_test_scalar (const C* s, const X& x, nat n) {
00295 VERIFY (Vec::vec_is_aligned (s), "address must be aligned");
00296 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00297 return vec_binary_test_scalar_aligned_helper<V,W,Op,C,X>::op (s, x, n); }
00298
00299 template<typename Op, typename C> static inline C
00300 vec_unary_big (const C* s, nat n) {
00301 VERIFY (Vec::vec_is_aligned (s), "address must be aligned");
00302 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00303 return vec_unary_big_aligned_helper<V,W,Op,C>::op (s, n); }
00304
00305 template<typename Op, typename C> static inline C
00306 vec_unary_big (const C* s, nat n, const format<C>& fm) {
00307 VERIFY (Vec::vec_is_aligned (s), "address must be aligned");
00308 VERIFY (Vec::template vec_is_aligned_size<C> (n), "size must be aligned");
00309 return vec_unary_big_aligned_helper<V,W,Op,C>::op (s, n, fm); }
00310
00311 template<typename Op, typename C> static inline C
00312 vec_unary_big_dicho (const C* s, nat n) {
00313 return vec_unary_big<Op, C> (s, n); }
00314
00315 template<typename Op, typename C> static inline C
00316 vec_unary_big_dicho (const C* s, nat n, const format<C>& fm) {
00317 return vec_unary_big<Op, C> (s, n, fm); }
00318
00319 template<typename Op, typename C1, typename C2> static inline C1
00320 vec_binary_big (const C1* s1, const C2* s2, nat n) {
00321 VERIFY (Vec::vec_is_aligned (s1), "address must be aligned");
00322 VERIFY (Vec::vec_is_aligned (s2), "address must be aligned");
00323 VERIFY (Vec::template vec_is_aligned_size<C1> (n), "size must be aligned");
00324 VERIFY (Vec::template vec_is_aligned_size<C2> (n), "size must be aligned");
00325 return vec_binary_big_aligned_helper<V,W,Op,C1,C2>::op (s1, s2, n); }
00326
00327 template<typename Op, typename C1, typename C2> static inline C1
00328 vec_binary_big (const C1* s1, const C2* s2, nat n,
00329 const format<C1>& fm1, const format<C2>& fm2) {
00330 VERIFY (Vec::vec_is_aligned (s1), "address must be aligned");
00331 VERIFY (Vec::vec_is_aligned (s2), "address must be aligned");
00332 VERIFY (Vec::template vec_is_aligned_size<C1> (n), "size must be aligned");
00333 VERIFY (Vec::template vec_is_aligned_size<C2> (n), "size must be aligned");
00334 return
00335 vec_binary_big_aligned_helper<V,W,Op,C1,C2>::op (s1, s2, n, fm1, fm2); }
00336
00337 };
00338
00339
00340
00341
00342
00343 template<typename Z, typename V, typename W>
00344 struct implementation<vector_allocate,Z,vector_aligned<V,W> >:
00345 public implementation<vector_allocate,vector_assume_aligned<V,W> >
00346 {};
00347
00348 template<typename Z, typename V, typename W>
00349 struct implementation<vector_abstractions,Z,vector_aligned<V,W> >:
00350 public implementation<vector_abstractions,V>
00351 {
00352 typedef vector_assume_aligned<V,W> Aligned;
00353 typedef implementation<vector_abstractions,V> NVec;
00354 typedef implementation<vector_abstractions,Aligned> AVec;
00355
00356 template<typename C> static inline nat
00357 vec_aligned_size (nat n) {
00358 return AVec::template vec_aligned_size<C> (n);
00359 }
00360
00361 template<typename Op, typename T>
00362 static inline void
00363 vec_nullary (T* dest, nat n) {
00364 if (align_helper<T>::len == 16) {
00365 nat r= min ((nat) AVec::vec_ceil_shift (dest), n);
00366 NVec::template vec_nullary<Op,T> (dest, r);
00367 dest += r; n -= r;
00368 r= AVec::template vec_floor_aligned_size<T> (n);
00369 AVec::template vec_nullary<Op,T> (dest, r);
00370 dest += r; n -= r;
00371 }
00372 NVec::template vec_nullary<Op,T> (dest, n);
00373 }
00374
00375 template<typename Op, typename T, typename C> static inline void
00376 vec_unary (T* dest, const C* s, nat n) {
00377 if (align_helper<T>::len == 16 &&
00378 align_helper<C>::len == 16) {
00379 nat rd= AVec::vec_ceil_shift (dest);
00380 nat rs= AVec::vec_ceil_shift (s);
00381 if (rd == rs) {
00382 nat r= min (rd, n);
00383 NVec::template vec_unary<Op,T,C> (dest, s, r);
00384 dest += r; s += r; n -= r;
00385 rd= AVec::template vec_floor_aligned_size<T> (n);
00386 rs= AVec::template vec_floor_aligned_size<C> (n);
00387 r= min (rd, rs);
00388 AVec::template vec_unary<Op,T,C> (dest, s, r);
00389 dest += r; s += r; n -= r;
00390 }
00391 }
00392 NVec::template vec_unary<Op,T,C> (dest, s, n);
00393 }
00394
00395 template<typename Op, typename T, typename C1, typename C2> static inline void
00396 vec_binary (T* dest, const C1* s1, const C2* s2, nat n) {
00397 if (align_helper<T >::len == 16 &&
00398 align_helper<C1>::len == 16 &&
00399 align_helper<C2>::len == 16) {
00400 nat rd = AVec::vec_ceil_shift (dest);
00401 nat rs1= AVec::vec_ceil_shift (s1);
00402 nat rs2= AVec::vec_ceil_shift (s2);
00403 if (rd == rs1 && rd == rs2) {
00404 nat r= min (rd, n);
00405 NVec::template vec_binary<Op,T,C1,C2> (dest, s1, s2, r);
00406 dest += r; s1 += r; s2 += r; n -= r;
00407 rd = AVec::template vec_floor_aligned_size<T> (n);
00408 rs1= AVec::template vec_floor_aligned_size<C1> (n);
00409 rs2= AVec::template vec_floor_aligned_size<C2> (n);
00410 r= min (min (rd, rs1), rs2);
00411 AVec::template vec_binary<Op,T,C1,C2> (dest, s1, s2, r);
00412 dest += r; s1 += r; s2 += r; n -= r;
00413 }
00414 }
00415 NVec::template vec_binary<Op,T,C1,C2> (dest, s1, s2, n);
00416 }
00417
00418 template<typename Op, typename T, typename X> static inline void
00419 vec_unary_scalar (T* dest, const X& x, nat n) {
00420 if (align_helper<T>::len == 16) {
00421 nat r= min ((nat) AVec::vec_ceil_shift (dest), n);
00422 NVec::template vec_unary_scalar<Op,T,X> (dest, x, r);
00423 dest += r; n -= r;
00424 r= AVec::template vec_floor_aligned_size<T> (n);
00425 AVec::template vec_unary_scalar<Op,T,X> (dest, x, r);
00426 dest += r; n -= r;
00427 }
00428 NVec::template vec_unary_scalar<Op,T,X> (dest, x, n);
00429 }
00430
00431 template<typename Op, typename T, typename C, typename X> static inline void
00432 vec_binary_scalar (T* dest, const C* s, const X& x, nat n) {
00433 if (align_helper<T>::len == 16 &&
00434 align_helper<C>::len == 16) {
00435 nat rd= AVec::vec_ceil_shift (dest);
00436 nat rs= AVec::vec_ceil_shift (s);
00437 if (rd == rs) {
00438 nat r= min (rd, n);
00439 NVec::template vec_binary_scalar<Op,T,C,X> (dest, s, x, r);
00440 dest += r; s += r; n -= r;
00441 rd= AVec::template vec_floor_aligned_size<T> (n);
00442 rs= AVec::template vec_floor_aligned_size<C> (n);
00443 r= min (rd, rs);
00444 AVec::template vec_binary_scalar<Op,T,C,X> (dest, s, x, r);
00445 dest += r; s += r; n -= r;
00446 }
00447 }
00448 NVec::template vec_binary_scalar<Op,T,C,X> (dest, s, x, n);
00449 }
00450
00451 template<typename Op, typename T, typename C1, typename C2, typename X>
00452 static inline void
00453 vec_ternary_scalar (T* dest, const C1* s1, const C2* s2, const X& x, nat n) {
00454 if (align_helper<T >::len == 16 &&
00455 align_helper<C1>::len == 16 &&
00456 align_helper<C2>::len == 16) {
00457 nat rd = AVec::vec_ceil_shift (dest);
00458 nat rs1= AVec::vec_ceil_shift (s1);
00459 nat rs2= AVec::vec_ceil_shift (s2);
00460 if (rd == rs1 && rd == rs2) {
00461 nat r= min (rd, n);
00462 NVec::template vec_ternary_scalar<Op,T,C1,C2,X> (dest, s1, s2, x, r);
00463 dest += r; s1 += r; s2 += r; n -= r;
00464 rd = AVec::template vec_floor_aligned_size<T > (n);
00465 rs1= AVec::template vec_floor_aligned_size<C1> (n);
00466 rs2= AVec::template vec_floor_aligned_size<C2> (n);
00467 r= min (min (rd, rs1), rs2);
00468 AVec::template vec_ternary_scalar<Op,T,C1,C2,X> (dest, s1, s2, x, r);
00469 dest += r; s1 += r; s2 += r; n -= r;
00470 }
00471 }
00472 NVec::template vec_ternary_scalar<Op,T,C1,C2,X> (dest, s1, s2, x, n);
00473 }
00474
00475 template<typename Op, typename T, typename C, typename X1, typename X2>
00476 static inline void
00477 vec_ternary_scalar_scalar (T* dest, const C* s,
00478 const X1& x1, const X2& x2, nat n) {
00479 if (align_helper<T>::len == 16 &&
00480 align_helper<C>::len == 16) {
00481 nat rd= AVec::vec_ceil_shift (dest);
00482 nat rs= AVec::vec_ceil_shift (s);
00483 if (rd == rs) {
00484 nat r= min (rd, n);
00485 NVec::template vec_ternary_scalar_scalar<Op,T,C,X1,X2>
00486 (dest, s, x1, x2, r);
00487 dest += r; s += r; n -= r;
00488 rd= AVec::template vec_floor_aligned_size<T> (n);
00489 rs= AVec::template vec_floor_aligned_size<C> (n);
00490 r= min (rd, rs);
00491 AVec::template vec_ternary_scalar_scalar<Op,T,C,X1,X2>
00492 (dest, s, x1, x2, r);
00493 dest += r; s += r; n -= r;
00494 }
00495 }
00496 NVec::template vec_ternary_scalar_scalar<Op,T,C,X1,X2>
00497 (dest, s, x1, x2, n);
00498 }
00499
00500 template<typename Op, typename C> static inline bool
00501 vec_unary_test (const C* s, nat n) {
00502 if (align_helper<C>::len == 16) {
00503 nat r= min ((nat) AVec::vec_ceil_shift (s), n);
00504 if (! NVec::template vec_unary_test<Op,C> (s, r)) return false;
00505 s += r; n -= r;
00506 r= AVec::template vec_floor_aligned_size<C> (n);
00507 if (! AVec::template vec_unary_test<Op,C> (s, r)) return false;
00508 s += r; n -= r;
00509 }
00510 return NVec::template vec_unary_test<Op,C> (s, n);
00511 }
00512
00513 template<typename Op, typename C1, typename C2> static inline bool
00514 vec_binary_test (const C1* s1, const C2* s2, nat n) {
00515 if (align_helper<C1>::len == 16 &&
00516 align_helper<C2>::len == 16) {
00517 nat r1= AVec::vec_ceil_shift (s1);
00518 nat r2= AVec::vec_ceil_shift (s2);
00519 if (r1 == r2) {
00520 nat r= min (r1, r2);
00521 if (! NVec::template vec_binary_test<Op,C1,C2> (s1, s2, r))
00522 return false;
00523 s1 += r; s2 += r; n -= r;
00524 r1= AVec::template vec_floor_aligned_size<C1> (n);
00525 r2= AVec::template vec_floor_aligned_size<C2> (n);
00526 r= min (r1, r2);
00527 if (! AVec::template vec_binary_test<Op,C1,C2> (s1, s2, r))
00528 return false;
00529 s1 += r; s2 += r; n -= r;
00530 }
00531 }
00532 return NVec::template vec_binary_test<Op,C1,C2> (s1, s2, n);
00533 }
00534
00535 template<typename Op, typename C, typename X> static inline bool
00536 vec_binary_test_scalar (const C* s, const X& x, nat n) {
00537 if (align_helper<C>::len == 16) {
00538 nat r= min ((nat) AVec::vec_ceil_shift (s), n);
00539 if (! NVec::template vec_binary_test_scalar<Op,C,X> (s, x, r))
00540 return false;
00541 s += r; n -= r;
00542 r= AVec::template vec_floor_aligned_size<C> (n);
00543 if (! AVec::template vec_binary_test_scalar<Op,C,X> (s, x, r))
00544 return false;
00545 s += r; n -= r;
00546 }
00547 return NVec::template vec_binary_test_scalar<Op,C> (s, x, n);
00548 }
00549
00550 template<typename Op, typename C> static inline C
00551 vec_unary_big (const C* s, nat n) {
00552 if (align_helper<C>::len == 16) {
00553 nat r= min ((nat) AVec::vec_ceil_shift (s), n);
00554 C c= NVec::template vec_unary_big<Op,C> (s, r);
00555 s += r; n -= r;
00556 r= AVec::template vec_floor_aligned_size<C> (n);
00557 Op::set_op (c, AVec::template vec_unary_big<Op,C> (s, r));
00558 s += r; n -= r;
00559 Op::set_op (c, NVec::template vec_unary_big<Op,C> (s, n));
00560 return c;
00561 }
00562 return NVec::template vec_unary_big<Op,C> (s, n);
00563 }
00564
00565 template<typename Op, typename C> static inline C
00566 vec_unary_big (const C* s, nat n, const format<C>& fm) {
00567 if (align_helper<C>::len == 16) {
00568 nat r= min ((nat) AVec::vec_ceil_shift (s), n);
00569 C c= NVec::template vec_unary_big<Op,C> (s, r, fm);
00570 s += r; n -= r;
00571 r= AVec::template vec_floor_aligned_size<C> (n);
00572 Op::set_op (c, AVec::template vec_unary_big<Op,C> (s, r, fm));
00573 s += r; n -= r;
00574 Op::set_op (c, NVec::template vec_unary_big<Op,C> (s, n, fm));
00575 return c;
00576 }
00577 return NVec::template vec_unary_big<Op,C> (s, n, fm);
00578 }
00579
00580 template<typename Op, typename C> static inline C
00581 vec_unary_big_dicho (const C* s, nat n) {
00582 return NVec::template vec_unary_big<Op, C> (s, n);
00583 }
00584
00585 template<typename Op, typename C> static inline C
00586 vec_unary_big_dicho (const C* s, nat n, const format<C>& fm) {
00587 return NVec::template vec_unary_big<Op, C> (s, n, fm);
00588 }
00589
00590 template<typename Op, typename C1, typename C2> static inline C1
00591 vec_binary_big (const C1* s1, const C2* s2, nat n) {
00592 if (AVec::vec_is_aligned (s1) &&
00593 AVec::template vec_is_aligned_size<C1> (n) &&
00594 AVec::vec_is_aligned (s2) &&
00595 AVec::template vec_is_aligned_size<C2> (n))
00596 return AVec::template vec_binary_big<Op,C1,C2> (s1, s2, n);
00597 return NVec::template vec_binary_big<Op,C1,C2> (s1, s2, n);
00598 }
00599
00600 template<typename Op, typename C1, typename C2> static inline C1
00601 vec_binary_big (const C1* s1, const C2* s2, nat n,
00602 const format<C1>& fm1, const format<C2>& fm2) {
00603 if (AVec::vec_is_aligned (s1) &&
00604 AVec::template vec_is_aligned_size<C1> (n) &&
00605 AVec::vec_is_aligned (s2) &&
00606 AVec::template vec_is_aligned_size<C2> (n))
00607 return AVec::template vec_binary_big<Op,C1,C2> (s1, s2, n, fm1, fm2);
00608 return NVec::template vec_binary_big<Op,C1,C2> (s1, s2, n, fm1, fm2);
00609 }
00610
00611 };
00612
00613 }
00614 #endif // __MMX__VECTOR_ALIGNED__HPP