Blender V2.61 - r43446
|
00001 00004 /* 00005 * -- SuperLU routine (version 3.0) -- 00006 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00007 * and Lawrence Berkeley National Lab. 00008 * October 15, 2003 00009 * 00010 */ 00011 /* 00012 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00013 00014 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00015 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00016 00017 Permission is hereby granted to use or copy this program for any 00018 purpose, provided the above notices are retained on all copies. 00019 Permission to modify the code and to distribute modified code is 00020 granted, provided the above notices are retained, and a notice that 00021 the code was modified is included with the above copyright notice. 00022 */ 00023 00024 #include <math.h> 00025 #include "ssp_defs.h" 00026 #include "util.h" 00027 00028 /* prototypes */ 00029 flops_t LUFactFlops(SuperLUStat_t *stat); 00030 flops_t LUSolveFlops(SuperLUStat_t *stat); 00031 float SpaSize(int n, int np, float sum_npw); 00032 float DenseSize(int n, float sum_nw); 00033 00034 /* 00035 * Global statistics variale 00036 */ 00037 00038 void superlu_abort_and_exit(char* msg) 00039 { 00040 fprintf(stderr, "%s", msg); 00041 exit (-1); 00042 } 00043 00044 /* 00045 * Set the default values for the options argument. 00046 */ 00047 void set_default_options(superlu_options_t *options) 00048 { 00049 options->Fact = DOFACT; 00050 options->Equil = YES; 00051 options->ColPerm = COLAMD; 00052 options->DiagPivotThresh = 1.0; 00053 options->Trans = NOTRANS; 00054 options->IterRefine = NOREFINE; 00055 options->SymmetricMode = NO; 00056 options->PivotGrowth = NO; 00057 options->ConditionNumber = NO; 00058 options->PrintStat = YES; 00059 } 00060 00061 /* Deallocate the structure pointing to the actual storage of the matrix. */ 00062 void 00063 Destroy_SuperMatrix_Store(SuperMatrix *A) 00064 { 00065 SUPERLU_FREE ( A->Store ); 00066 } 00067 00068 void 00069 Destroy_CompCol_Matrix(SuperMatrix *A) 00070 { 00071 SUPERLU_FREE( ((NCformat *)A->Store)->rowind ); 00072 SUPERLU_FREE( ((NCformat *)A->Store)->colptr ); 00073 SUPERLU_FREE( ((NCformat *)A->Store)->nzval ); 00074 SUPERLU_FREE( A->Store ); 00075 } 00076 00077 void 00078 Destroy_CompRow_Matrix(SuperMatrix *A) 00079 { 00080 SUPERLU_FREE( ((NRformat *)A->Store)->colind ); 00081 SUPERLU_FREE( ((NRformat *)A->Store)->rowptr ); 00082 SUPERLU_FREE( ((NRformat *)A->Store)->nzval ); 00083 SUPERLU_FREE( A->Store ); 00084 } 00085 00086 void 00087 Destroy_SuperNode_Matrix(SuperMatrix *A) 00088 { 00089 SUPERLU_FREE ( ((SCformat *)A->Store)->rowind ); 00090 SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr ); 00091 SUPERLU_FREE ( ((SCformat *)A->Store)->nzval ); 00092 SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr ); 00093 SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup ); 00094 SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col ); 00095 SUPERLU_FREE ( A->Store ); 00096 } 00097 00098 /* A is of type Stype==NCP */ 00099 void 00100 Destroy_CompCol_Permuted(SuperMatrix *A) 00101 { 00102 SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg ); 00103 SUPERLU_FREE ( ((NCPformat *)A->Store)->colend ); 00104 SUPERLU_FREE ( A->Store ); 00105 } 00106 00107 /* A is of type Stype==DN */ 00108 void 00109 Destroy_Dense_Matrix(SuperMatrix *A) 00110 { 00111 DNformat* Astore = A->Store; 00112 SUPERLU_FREE (Astore->nzval); 00113 SUPERLU_FREE ( A->Store ); 00114 } 00115 00116 /* 00117 * Reset repfnz[] for the current column 00118 */ 00119 void 00120 resetrep_col (const int nseg, const int *segrep, int *repfnz) 00121 { 00122 int i, irep; 00123 00124 for (i = 0; i < nseg; i++) { 00125 irep = segrep[i]; 00126 repfnz[irep] = EMPTY; 00127 } 00128 } 00129 00130 00131 /* 00132 * Count the total number of nonzeros in factors L and U, and in the 00133 * symmetrically reduced L. 00134 */ 00135 void 00136 countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu) 00137 { 00138 int nsuper, fsupc, i, j; 00139 int nnzL0, jlen, irep; 00140 int *xsup, *xlsub; 00141 00142 xsup = Glu->xsup; 00143 xlsub = Glu->xlsub; 00144 *nnzL = 0; 00145 *nnzU = (Glu->xusub)[n]; 00146 nnzL0 = 0; 00147 nsuper = (Glu->supno)[n]; 00148 00149 if ( n <= 0 ) return; 00150 00151 /* 00152 * For each supernode 00153 */ 00154 for (i = 0; i <= nsuper; i++) { 00155 fsupc = xsup[i]; 00156 jlen = xlsub[fsupc+1] - xlsub[fsupc]; 00157 00158 for (j = fsupc; j < xsup[i+1]; j++) { 00159 *nnzL += jlen; 00160 *nnzU += j - fsupc + 1; 00161 jlen--; 00162 } 00163 irep = xsup[i+1] - 1; 00164 nnzL0 += xprune[irep] - xlsub[irep]; 00165 } 00166 00167 /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/ 00168 } 00169 00170 00171 00172 /* 00173 * Fix up the data storage lsub for L-subscripts. It removes the subscript 00174 * sets for structural pruning, and applies permuation to the remaining 00175 * subscripts. 00176 */ 00177 void 00178 fixupL(const int n, const int *perm_r, GlobalLU_t *Glu) 00179 { 00180 register int nsuper, fsupc, nextl, i, j, k, jstrt; 00181 int *xsup, *lsub, *xlsub; 00182 00183 if ( n <= 1 ) return; 00184 00185 xsup = Glu->xsup; 00186 lsub = Glu->lsub; 00187 xlsub = Glu->xlsub; 00188 nextl = 0; 00189 nsuper = (Glu->supno)[n]; 00190 00191 /* 00192 * For each supernode ... 00193 */ 00194 for (i = 0; i <= nsuper; i++) { 00195 fsupc = xsup[i]; 00196 jstrt = xlsub[fsupc]; 00197 xlsub[fsupc] = nextl; 00198 for (j = jstrt; j < xlsub[fsupc+1]; j++) { 00199 lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */ 00200 nextl++; 00201 } 00202 for (k = fsupc+1; k < xsup[i+1]; k++) 00203 xlsub[k] = nextl; /* Other columns in supernode i */ 00204 00205 } 00206 00207 xlsub[n] = nextl; 00208 } 00209 00210 00211 /* 00212 * Diagnostic print of segment info after panel_dfs(). 00213 */ 00214 void print_panel_seg(int n, int w, int jcol, int nseg, 00215 int *segrep, int *repfnz) 00216 { 00217 int j, k; 00218 00219 for (j = jcol; j < jcol+w; j++) { 00220 printf("\tcol %d:\n", j); 00221 for (k = 0; k < nseg; k++) 00222 printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 00223 segrep[k], repfnz[(j-jcol)*n + segrep[k]]); 00224 } 00225 00226 } 00227 00228 00229 void 00230 StatInit(SuperLUStat_t *stat) 00231 { 00232 register int i, w, panel_size, relax; 00233 00234 panel_size = sp_ienv(1); 00235 relax = sp_ienv(2); 00236 w = SUPERLU_MAX(panel_size, relax); 00237 stat->panel_histo = intCalloc(w+1); 00238 stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double)); 00239 if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime"); 00240 stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t)); 00241 if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops"); 00242 for (i = 0; i < NPHASES; ++i) { 00243 stat->utime[i] = 0.; 00244 stat->ops[i] = 0.; 00245 } 00246 } 00247 00248 00249 void 00250 StatPrint(SuperLUStat_t *stat) 00251 { 00252 double *utime; 00253 flops_t *ops; 00254 00255 utime = stat->utime; 00256 ops = stat->ops; 00257 printf("Factor time = %8.2f\n", utime[FACT]); 00258 if ( utime[FACT] != 0.0 ) 00259 printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], 00260 ops[FACT]*1e-6/utime[FACT]); 00261 00262 printf("Solve time = %8.2f\n", utime[SOLVE]); 00263 if ( utime[SOLVE] != 0.0 ) 00264 printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE], 00265 ops[SOLVE]*1e-6/utime[SOLVE]); 00266 00267 } 00268 00269 00270 void 00271 StatFree(SuperLUStat_t *stat) 00272 { 00273 SUPERLU_FREE(stat->panel_histo); 00274 SUPERLU_FREE(stat->utime); 00275 SUPERLU_FREE(stat->ops); 00276 } 00277 00278 00279 flops_t 00280 LUFactFlops(SuperLUStat_t *stat) 00281 { 00282 return (stat->ops[FACT]); 00283 } 00284 00285 flops_t 00286 LUSolveFlops(SuperLUStat_t *stat) 00287 { 00288 return (stat->ops[SOLVE]); 00289 } 00290 00291 00292 00293 00294 00295 /* 00296 * Fills an integer array with a given value. 00297 */ 00298 void ifill(int *a, int alen, int ival) 00299 { 00300 register int i; 00301 for (i = 0; i < alen; i++) a[i] = ival; 00302 } 00303 00304 00305 00306 /* 00307 * Get the statistics of the supernodes 00308 */ 00309 #define NBUCKS 10 00310 static int max_sup_size; 00311 00312 void super_stats(int nsuper, int *xsup) 00313 { 00314 register int nsup1 = 0; 00315 int i, isize, whichb, bl, bh; 00316 int bucket[NBUCKS]; 00317 00318 max_sup_size = 0; 00319 00320 for (i = 0; i <= nsuper; i++) { 00321 isize = xsup[i+1] - xsup[i]; 00322 if ( isize == 1 ) nsup1++; 00323 if ( max_sup_size < isize ) max_sup_size = isize; 00324 } 00325 00326 printf(" Supernode statistics:\n\tno of super = %d\n", nsuper+1); 00327 printf("\tmax supernode size = %d\n", max_sup_size); 00328 printf("\tno of size 1 supernodes = %d\n", nsup1); 00329 00330 /* Histogram of the supernode sizes */ 00331 ifill (bucket, NBUCKS, 0); 00332 00333 for (i = 0; i <= nsuper; i++) { 00334 isize = xsup[i+1] - xsup[i]; 00335 whichb = (float) isize / max_sup_size * NBUCKS; 00336 if (whichb >= NBUCKS) whichb = NBUCKS - 1; 00337 bucket[whichb]++; 00338 } 00339 00340 printf("\tHistogram of supernode sizes:\n"); 00341 for (i = 0; i < NBUCKS; i++) { 00342 bl = (float) i * max_sup_size / NBUCKS; 00343 bh = (float) (i+1) * max_sup_size / NBUCKS; 00344 printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]); 00345 } 00346 00347 } 00348 00349 00350 float SpaSize(int n, int np, float sum_npw) 00351 { 00352 return (sum_npw*8 + np*8 + n*4)/1024.; 00353 } 00354 00355 float DenseSize(int n, float sum_nw) 00356 { 00357 return (sum_nw*8 + n*8)/1024.;; 00358 } 00359 00360 00361 00362 /* 00363 * Check whether repfnz[] == EMPTY after reset. 00364 */ 00365 void check_repfnz(int n, int w, int jcol, int *repfnz) 00366 { 00367 int jj, k; 00368 00369 for (jj = jcol; jj < jcol+w; jj++) 00370 for (k = 0; k < n; k++) 00371 if ( repfnz[(jj-jcol)*n + k] != EMPTY ) { 00372 fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj, 00373 k, repfnz[(jj-jcol)*n + k]); 00374 ABORT("check_repfnz"); 00375 } 00376 } 00377 00378 00379 /* Print a summary of the testing results. */ 00380 void 00381 PrintSumm(char *type, int nfail, int nrun, int nerrs) 00382 { 00383 if ( nfail > 0 ) 00384 printf("%3s driver: %d out of %d tests failed to pass the threshold\n", 00385 type, nfail, nrun); 00386 else 00387 printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun); 00388 00389 if ( nerrs > 0 ) 00390 printf("%6d error messages recorded\n", nerrs); 00391 } 00392 00393 00394 int print_int_vec(char *what, int n, int *vec) 00395 { 00396 int i; 00397 printf("%s\n", what); 00398 for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]); 00399 return 0; 00400 }