00001 #ifndef ALREADY_dpivmacrev
00002 #define ALREADY_dpivmacrev
00003
00004 template<>
00005 int
00006 dpivotL(
00007 const int jcol,
00008 const macrev_Z u,
00009 int *usepr,
00010 int *perm_r,
00011 int *iperm_r,
00012 int *iperm_c,
00013 int *pivrow,
00014 GlobalLU_t<macrev_Z> *Glu
00015 )
00016 {
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 int fsupc;
00039 int nsupc;
00040 int nsupr;
00041 int lptr;
00042 int pivptr, old_pivptr, diag, diagind;
00043 macrev_Z pivmax, rtemp, thresh;
00044 macrev_Z temp;
00045 macrev_Z *lu_sup_ptr;
00046 macrev_Z *lu_col_ptr;
00047 int *lsub_ptr;
00048 int isub, icol, k, itemp;
00049 int *lsub, *xlsub;
00050 macrev_Z *lusup;
00051 int *xlusup;
00052 extern SuperLUStat_t SuperLUStat;
00053 flops_t *ops = SuperLUStat.ops;
00054
00055
00056 lsub = Glu->lsub;
00057 xlsub = Glu->xlsub;
00058 lusup = Glu->lusup;
00059 xlusup = Glu->xlusup;
00060 fsupc = (Glu->xsup)[(Glu->supno)[jcol]];
00061 nsupc = jcol - fsupc;
00062 lptr = xlsub[fsupc];
00063 nsupr = xlsub[fsupc+1] - lptr;
00064 lu_sup_ptr = &lusup[xlusup[fsupc]];
00065 lu_col_ptr = &lusup[xlusup[jcol]];
00066 lsub_ptr = &lsub[lptr];
00067
00068 #ifdef DEBUG
00069 if ( jcol == MIN_COL ) {
00070 printf("Before cdiv: col %d\n", jcol);
00071 for (k = nsupc; k < nsupr; k++)
00072 printf(" lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
00073 }
00074 #endif
00075
00076
00077
00078 if ( *usepr ) *pivrow = iperm_r[jcol];
00079 diagind = iperm_c[jcol];
00080 pivmax =(macrev_Z)0;
00081 pivptr = nsupc;
00082 diag = EMPTY;
00083 old_pivptr = nsupc;
00084 for (isub = nsupc; isub < nsupr; ++isub) {
00085 rtemp = lu_col_ptr[isub];
00086
00087 if ( rtemp != 0 ) {
00088 pivmax = rtemp;
00089 pivptr = isub;
00090 break;
00091 }
00092 if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
00093 if ( lsub_ptr[isub] == diagind ) diag = isub;
00094 }
00095
00096
00097 if ( pivmax ==0) {
00098 *pivrow = lsub_ptr[nsupc];
00099
00100 perm_r[*pivrow] = jcol;
00101 *usepr = 0;
00102 return (jcol+1);
00103 }
00104
00105 thresh = u * pivmax;
00106
00107
00108 if ( *usepr ) {
00109
00110 rtemp = lu_col_ptr[old_pivptr];
00111
00112 if ( rtemp != (macrev_Z)0 )
00113 pivptr = old_pivptr;
00114 else
00115 *usepr = 0;
00116 }
00117 if ( *usepr == 0 ) {
00118
00119 if ( diag >= 0 ) {
00120 rtemp = lu_col_ptr[diag];
00121 if ( rtemp != (macrev_Z)0 ) pivptr = diag;
00122 }
00123 *pivrow = lsub_ptr[pivptr];
00124 }
00125
00126
00127
00128 perm_r[*pivrow] = jcol;
00129
00130
00131 if ( pivptr != nsupc ) {
00132 itemp = lsub_ptr[pivptr];
00133 lsub_ptr[pivptr] = lsub_ptr[nsupc];
00134 lsub_ptr[nsupc] = itemp;
00135
00136
00137
00138
00139 for (icol = 0; icol <= nsupc; icol++) {
00140 itemp = pivptr + icol * nsupr;
00141 temp = lu_sup_ptr[itemp];
00142 lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
00143 lu_sup_ptr[nsupc + icol*nsupr] = temp;
00144 }
00145 }
00146
00147
00148 ops[FACT] += nsupr - nsupc;
00149 temp = (macrev_Z)1 / lu_col_ptr[nsupc];
00150 for (k = nsupc+1; k < nsupr; k++)
00151 lu_col_ptr[k] *= temp;
00152
00153 return 0;
00154 }
00155
00156 void
00157 dSetRWork(int m, int panel_size, macrev_Z *dworkptr,
00158 macrev_Z **dense, macrev_Z **tempv)
00159 {
00160 macrev_Z zero(0);
00161 int i;
00162 int maxsuper = sp_ienv(3),
00163 rowblk = sp_ienv(4);
00164 *dense = dworkptr;
00165 *tempv = *dense + panel_size*m;
00166 for(i=0;i<m*panel_size;i++)
00167 mpz_init((*dense)[i].rep);
00168 for(i=0;i<NUM_TEMPV(m,panel_size,maxsuper,rowblk);i++)
00169 mpz_init((*tempv)[i].rep);
00170
00171
00172 }
00173
00174 void dLUWorkFree(int m,int panel_size, int *iwork, macrev_Z *dwork
00175 , GlobalLU_t<macrev_Z > *Glu)
00176 {
00177 int i;
00178 int maxsuper = sp_ienv(3),
00179 rowblk = sp_ienv(4);
00180 if ( Glu->MemModel == SYSTEM ) {
00181 SUPERLU_FREE (iwork);
00182 for(i=0;i<m*panel_size+NUM_TEMPV(m,panel_size,maxsuper,rowblk);i++)
00183 mpz_clear((dwork[i].rep));
00184 SUPERLU_FREE (dwork);
00185 } else {
00186 stack.used -= (stack.size - stack.top2);
00187 stack.top2 = stack.size;
00188
00189 }
00190
00191 SUPERLU_FREE (expanders);
00192 }
00193
00194 void* dexpand (
00195 int *prev_len,
00196 MemType type,
00197 int len_to_copy,
00198 int keep_prev,
00199
00200 GlobalLU_t<macrev_Z > *Glu
00201 )
00202 {
00203 double EXPAND = 1.5;
00204 double alpha;
00205 void *new_mem, *old_mem;
00206 int new_len, tries, lword, extra, bytes_to_copy;
00207
00208 alpha = EXPAND;
00209
00210 if ( no_expand == 0 || keep_prev )
00211 new_len = *prev_len;
00212 else {
00213 new_len = (int)(alpha * *prev_len);
00214 }
00215
00216 if ( type == LSUB || type == USUB ) lword = sizeof(int);
00217 else lword = sizeof(macrev_Z);
00218
00219 if ( Glu->MemModel == SYSTEM ) {
00220 new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
00221 if(new_mem && lword==sizeof(macrev_Z))
00222 for(extra=0;extra<new_len;extra++)
00223 mpz_init(((macrev_Z*)new_mem)[extra].rep);
00224
00225 if ( no_expand != 0 ) {
00226 tries = 0;
00227 if ( keep_prev ) {
00228 if ( !new_mem ) return (NULL);
00229 } else {
00230 while ( !new_mem )
00231 {
00232 if ( ++tries > 10 ) return (NULL);
00233 alpha = Reduce(alpha);
00234 new_len = (int)(alpha * *prev_len);
00235 new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
00236
00237 }
00238 if(tries && lword==sizeof(macrev_Z))
00239 for(int extra=0;extra<new_len;extra++)
00240 mpz_init(((macrev_Z*)new_mem)[extra].rep);
00241 }
00242 if ( type == LSUB || type == USUB ) {
00243 copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
00244 } else {
00245 copy_mem_double<macrev_Z >(len_to_copy,
00246 expanders[type].mem, new_mem);
00247
00248 for(unsigned int i=0;i<len_to_copy;i++)
00249 mpz_clear(((macrev_Z*)expanders[type].mem)[i].rep);
00250 }
00251
00252 SUPERLU_FREE (expanders[type].mem);
00253 }
00254 expanders[type].mem = (void *) new_mem;
00255
00256 } else {
00257 if ( no_expand == 0 ) {
00258 new_mem = duser_malloc(new_len * lword, HEAD);
00259 if ( NotDoubleAlign(new_mem) &&
00260 (type == LUSUP || type == UCOL) ) {
00261 old_mem = new_mem;
00262 new_mem = (void *)DoubleAlign(new_mem);
00263 extra = (char*)new_mem - (char*)old_mem;
00264 #ifdef DEBUG
00265 printf("expand(): not aligned, extra %d\n", extra);
00266 #endif
00267 stack.top1 += extra;
00268 stack.used += extra;
00269 }
00270 expanders[type].mem = (void *) new_mem;
00271 }
00272 else {
00273 tries = 0;
00274 extra = (new_len - *prev_len) * lword;
00275 if ( keep_prev ) {
00276 if ( StackFull(extra) ) return (NULL);
00277 } else {
00278 while ( StackFull(extra) ) {
00279 if ( ++tries > 10 ) return (NULL);
00280 alpha = Reduce(alpha);
00281 new_len = (int)(alpha * *prev_len);
00282 extra = (new_len - *prev_len) * lword;
00283 }
00284 }
00285
00286 if ( type != USUB ) {
00287 new_mem = (void*)((char*)expanders[type + 1].mem + extra);
00288 bytes_to_copy = (char*)stack.array + stack.top1
00289 - (char*)expanders[type + 1].mem;
00290 user_bcopy((char*)expanders[type+1].mem,
00291 (char*)new_mem, bytes_to_copy);
00292
00293 if ( type < USUB ) {
00294 Glu->usub = (int*)
00295 (expanders[USUB].mem =
00296 (void*)((char*)expanders[USUB].mem + extra));
00297 }
00298 if ( type < LSUB ) {
00299 Glu->lsub =(int*)(expanders[LSUB].mem =
00300 (void*)((char*)expanders[LSUB].mem + extra));
00301 }
00302 if ( type < UCOL ) {
00303 Glu->ucol =(macrev_Z*)( expanders[UCOL].mem =
00304 (void*)((char*)expanders[UCOL].mem + extra));
00305 }
00306 stack.top1 += extra;
00307 stack.used += extra;
00308 if ( type == UCOL ) {
00309 stack.top1 += extra;
00310 stack.used += extra;
00311 }
00312
00313 }
00314
00315 }
00316 }
00317
00318 expanders[type].size = new_len;
00319 *prev_len = new_len;
00320 if ( no_expand ) ++no_expand;
00321
00322 return (void *) expanders[type].mem;
00323
00324 }
00325
00326 template<>
00327 harewell<macrev_Z >::harewell(const int nrow,const int ncol)
00328 {
00329
00330 this->nrow=nrow;
00331 this->ncol=ncol;
00332 this->rowind=(int*)malloc(sizeof(int)*(this->ncol));
00333 this->colptr=(int*)malloc(sizeof(int)*(1+this->ncol));
00334 this->nzval=MAC_REV_MALLOC<macrev_Z >(sizeof(macrev_Z)*(this->ncol));
00335 this->nnz=this->ncol;
00336 for(int i=0;i<this->ncol;i++)
00337 {
00338 this->rowind[i]=i;
00339 this->colptr[i]=i;
00340
00341 }
00342 this->colptr[this->ncol]=this->nnz;
00343 }
00344
00345 template<>
00346 inline void harewell<macrev_Z >::destroystore()
00347 {
00348 free(rowind);
00349 rowind=NULL;
00350 free(colptr);
00351 colptr=NULL;
00352
00353 MAC_REV_FREE<macrev_Z>(nzval,nnz*sizeof(macrev_Z));
00354 nzval=NULL;
00355 nnz=0;
00356 }
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391 void remiseformatNC(SuperMatrix<macrev_Z > &M,const vector<macrev_Z > &nzval
00392 ,const vector<int> &rowind,const vector<int> &colptr
00393 ,int coeff)
00394 {
00395 ((NCformat *)M.Store)->nnz=coeff;
00396 destroystore(M);
00397 M.Stype=NC;
00398 M.Store=(void *)malloc(sizeof(NCformat));
00399 NCformat *Mstore=(NCformat *)M.Store;
00400 Mstore->nnz=nzval.size();
00401 Mstore->nzval=(void *)malloc(sizeof(macrev_Z)*(nzval.size()));
00402 Mstore->rowind=(int*)malloc(sizeof(int)*(nzval.size()));
00403 Mstore->colptr=(int*)malloc(sizeof(int)*(colptr.size()));
00404 for(unsigned int i=0;i<nzval.size();++i)
00405 {
00406 Mstore->rowind[i]=rowind[i];
00407 mpz_init(((macrev_Z*)(Mstore->nzval))[i].rep);
00408 ((macrev_Z*)(Mstore->nzval))[i]=nzval[i];
00409 }
00410 for(unsigned int i=0;i<colptr.size();++i)
00411 Mstore->colptr[i]=colptr[i];
00412 return;
00413 }
00414
00415
00416 #endif //ALREADY_dpivmacrev