00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __MMX__POLYNOMIAL_UNROLLED__HPP
00014 #define __MMX__POLYNOMIAL_UNROLLED__HPP
00015 #include <algebramix/polynomial_naive.hpp>
00016 #include <algebramix/polynomial.hpp>
00017 #include <algebramix/vector_unrolled.hpp>
00018 #include <numerix/simd.hpp>
00019
00020 #define TMPL template<typename C>
00021 #define TMPLK template<typename C, typename K>
00022
00023 namespace mmx {
00024
00025
00026
00027
00028
00029 template<typename V, nat sz>
00030 struct polynomial_unrolled: public V {
00031 typedef vector_unrolled<sz> Vec;
00032 typedef typename V::Naive Naive;
00033 typedef polynomial_unrolled<typename V::Positive,sz> Positive;
00034 typedef polynomial_unrolled<typename V::No_simd,sz> No_simd;
00035 typedef polynomial_unrolled<typename V::No_thread,sz> No_thread;
00036 typedef polynomial_unrolled<typename V::No_scaled,sz> No_scaled;
00037 };
00038
00039 template<typename F, typename V, typename W, nat sz>
00040 struct implementation<F,V,polynomial_unrolled<W,sz> >:
00041 public implementation<F,V,W> {};
00042
00043
00044
00045
00046
00047 template<typename V, typename C, typename K>
00048 struct polynomial_mul_helper {
00049 typedef implementation<vector_linear,V> Vec;
00050 typedef implementation<polynomial_linear,V> Pol;
00051
00052 static void
00053 op (C* dest, const C* s1, const K* s2, nat n1, nat n2) {
00054 nat l = aligned_size<K,V> (n2);
00055 K* rev_s2 = mmx_new<K> (l);
00056 Pol::reverse (rev_s2, s2, n2);
00057 for (nat i = 1; i <= n2; i++, dest++)
00058 *dest = Vec::inn_prod (s1 , rev_s2 + n2 - i, min (i, n1));
00059 for (nat i = 1; i < n1; i++, dest++)
00060 *dest = Vec::inn_prod (s1 + i, rev_s2 , min (n2, n1 - i));
00061 mmx_delete (rev_s2, l);
00062 }
00063 };
00064
00065 template<typename V, typename C>
00066 struct polynomial_tmul_helper {
00067 typedef implementation<vector_linear,V> Vec;
00068
00069 static void
00070 op (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00071 for (nat i = 0; i < n2; i++, dest++, s2++)
00072 *dest = Vec::inn_prod (s1, s2, n1);
00073 }
00074 };
00075
00076 template<typename V, typename W, nat m>
00077 struct implementation<polynomial_multiply,V,polynomial_unrolled<W,m> >:
00078 public implementation<polynomial_linear,V>
00079 {
00080 typedef implementation<vector_linear,V> Vec;
00081 typedef implementation<polynomial_multiply,W> Fallback;
00082
00083 TMPLK static void
00084 mul (C* dest, const C* s1, const K* s2, nat n1, nat n2) {
00085 Vec::clear (dest, n1+n2-1);
00086 polynomial_mul_helper<W,C,K>::op (dest, s1, s2, n1, n2);
00087 }
00088
00089 TMPL static inline void
00090 square (C* dest, const C* s, nat n) {
00091 mul (dest, s, s, n, n);
00092 }
00093
00094 TMPL static void
00095 tmul (C* dest, const C* s1, const C* s2, nat n1, nat n2) {
00096 Vec::clear (dest, n2);
00097 polynomial_tmul_helper<W,C>::op (dest, s1, s2, n1, n2);
00098 }
00099
00100 };
00101
00102
00103
00104
00105
00106 template<typename V, typename W, nat m>
00107 struct implementation<polynomial_graeffe,V,polynomial_unrolled<W,m> >:
00108 public implementation<polynomial_multiply,V>
00109 {
00110 typedef implementation<vector_linear,V> Vec;
00111 typedef implementation<polynomial_multiply,V> Pol;
00112
00113 TMPL static void
00114 graeffe (C* dest, const C* s, nat n) {
00115 nat l1= (n+1) >> 1, l2= n >> 1;
00116
00117 nat l= aligned_size<C,V> (n + 1);
00118 C* c1= mmx_new<C> (l);
00119 C* c2= mmx_new<C> (l);
00120 if (n>0) dest[n-1]= C(0);
00121 Vec::half_copy (c1, s , l1);
00122 Pol::square (dest, c1, l1);
00123 Vec::half_copy (c1, s+1, l2);
00124 Pol::square (c2 , c1, l2);
00125 Pol::copy (c1 + 1, c2, l2 << 1);
00126 c1[0] = C(0);
00127 Pol::sub (dest, c1, l2 << 1);
00128 mmx_delete<C> (c1, l);
00129 mmx_delete<C> (c2, l);
00130 }
00131
00132 };
00133
00134
00135
00136
00137
00138 template<typename V, typename C>
00139 struct polynomial_quo_rem_helper {
00140 typedef implementation<vector_linear,V> Vec;
00141
00142 static void
00143 op (C* dest, C* s1, const C* s2, nat n1, nat n2) {
00144
00145
00146
00147 while (n1 >= n2 && n1 != 0) {
00148 int d= n1-n2;
00149 C q= quo (s1[n1-1], s2[n2-1]);
00150 dest[d]= q;
00151 Vec::template vec_binary_scalar<mul_add_op,C,C,C> (s1 + d, s2, -q, n2);
00152 n1--;
00153 }
00154 }
00155 };
00156
00157 template<typename V, typename W, nat m>
00158 struct implementation<polynomial_divide,V,polynomial_unrolled<W,m> >:
00159 public implementation<polynomial_multiply,V>
00160 {
00161
00162 TMPL static void
00163 quo_rem (C* dest, C* s1, const C* s2, nat n1, nat n2) {
00164 polynomial_quo_rem_helper<W,C>::op (dest, s1, s2, n1, n2);
00165 }
00166
00167 };
00168
00169
00170
00171
00172
00173 template<typename V, typename C>
00174 struct polynomial_evaluate_helper {
00175 typedef implementation<polynomial_evaluate,polynomial_naive> Pol;
00176
00177 static inline C
00178 op (const C* p, const C& x, nat l) {
00179 return Pol::evaluate (p, x, l);
00180 }
00181 };
00182 #ifdef ALGEBRAMIX_ENABLE_UNSTABLE
00183 template<typename V, typename W, nat m>
00184 struct implementation<polynomial_evaluate,V,polynomial_unrolled<W,m> >:
00185 public implementation<polynomial_divide,V>
00186 {
00187
00188 TMPL static void
00189 annulator (C* d, const C* s, nat n) {
00190 (void) d; (void) s; (void) n;
00191 ERROR ("expand not yet implemented");
00192 }
00193
00194 TMPL static C
00195 evaluate (const C* p, const C& x, nat l) {
00196 return polynomial_evaluate_helper<W,C>::op (p, x, l);
00197 }
00198
00199 TMPL static void
00200 evaluate (C* v, const C* p, const C* x, nat l, nat n) {
00201
00202 for (nat j=0; j<n; j++)
00203 v[j]= evaluate (p, x[j], l);
00204 }
00205
00206 TMPL static void
00207 expand (C** v, const C* p, const C* x, const nat* nu, nat n, nat k) {
00208 (void) v; (void) p; (void) x; (void) nu; (void) n; (void) k;
00209 ERROR ("expand not yet implemented");
00210 }
00211
00212 };
00213 #endif
00214 }
00215
00216 #undef TMPL
00217 #undef TMPLK
00218 #endif //__MMX__POLYNOMIAL_UNROLLED__HPP