Blender V2.61 - r43446
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00014 00015 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00016 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00017 00018 Permission is hereby granted to use or copy this program for any 00019 purpose, provided the above notices are retained on all copies. 00020 Permission to modify the code and to distribute modified code is 00021 granted, provided the above notices are retained, and a notice that 00022 the code was modified is included with the above copyright notice. 00023 */ 00024 00025 #include <math.h> 00026 #include "ssp_defs.h" 00027 00028 /* prototypes */ 00029 void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu); 00030 void scheck_tempv(int n, float *tempv); 00031 void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,float , float , 00032 float *, float *, char *, SuperLUStat_t *); 00033 int print_float_vec(char *what, int n, float *vec); 00034 /* ********** */ 00035 00036 void 00037 sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 00038 float *nzval, int *rowind, int *colptr, 00039 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00040 { 00041 NCformat *Astore; 00042 00043 A->Stype = stype; 00044 A->Dtype = dtype; 00045 A->Mtype = mtype; 00046 A->nrow = m; 00047 A->ncol = n; 00048 A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); 00049 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); 00050 Astore = A->Store; 00051 Astore->nnz = nnz; 00052 Astore->nzval = nzval; 00053 Astore->rowind = rowind; 00054 Astore->colptr = colptr; 00055 } 00056 00057 void 00058 sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 00059 float *nzval, int *colind, int *rowptr, 00060 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00061 { 00062 NRformat *Astore; 00063 00064 A->Stype = stype; 00065 A->Dtype = dtype; 00066 A->Mtype = mtype; 00067 A->nrow = m; 00068 A->ncol = n; 00069 A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) ); 00070 if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); 00071 Astore = A->Store; 00072 Astore->nnz = nnz; 00073 Astore->nzval = nzval; 00074 Astore->colind = colind; 00075 Astore->rowptr = rowptr; 00076 } 00077 00078 /* Copy matrix A into matrix B. */ 00079 void 00080 sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) 00081 { 00082 NCformat *Astore, *Bstore; 00083 int ncol, nnz, i; 00084 00085 B->Stype = A->Stype; 00086 B->Dtype = A->Dtype; 00087 B->Mtype = A->Mtype; 00088 B->nrow = A->nrow;; 00089 B->ncol = ncol = A->ncol; 00090 Astore = (NCformat *) A->Store; 00091 Bstore = (NCformat *) B->Store; 00092 Bstore->nnz = nnz = Astore->nnz; 00093 for (i = 0; i < nnz; ++i) 00094 ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i]; 00095 for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; 00096 for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; 00097 } 00098 00099 00100 void 00101 sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx, 00102 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00103 { 00104 DNformat *Xstore; 00105 00106 X->Stype = stype; 00107 X->Dtype = dtype; 00108 X->Mtype = mtype; 00109 X->nrow = m; 00110 X->ncol = n; 00111 X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); 00112 if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store"); 00113 Xstore = (DNformat *) X->Store; 00114 Xstore->lda = ldx; 00115 Xstore->nzval = (float *) x; 00116 } 00117 00118 void 00119 sCopy_Dense_Matrix(int M, int N, float *X, int ldx, 00120 float *Y, int ldy) 00121 { 00122 /* 00123 * 00124 * Purpose 00125 * ======= 00126 * 00127 * Copies a two-dimensional matrix X to another matrix Y. 00128 */ 00129 int i, j; 00130 00131 for (j = 0; j < N; ++j) 00132 for (i = 0; i < M; ++i) 00133 Y[i + j*ldy] = X[i + j*ldx]; 00134 } 00135 00136 void 00137 sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 00138 float *nzval, int *nzval_colptr, int *rowind, 00139 int *rowind_colptr, int *col_to_sup, int *sup_to_col, 00140 Stype_t stype, Dtype_t dtype, Mtype_t mtype) 00141 { 00142 SCformat *Lstore; 00143 00144 L->Stype = stype; 00145 L->Dtype = dtype; 00146 L->Mtype = mtype; 00147 L->nrow = m; 00148 L->ncol = n; 00149 L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); 00150 if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); 00151 Lstore = L->Store; 00152 Lstore->nnz = nnz; 00153 Lstore->nsuper = col_to_sup[n]; 00154 Lstore->nzval = nzval; 00155 Lstore->nzval_colptr = nzval_colptr; 00156 Lstore->rowind = rowind; 00157 Lstore->rowind_colptr = rowind_colptr; 00158 Lstore->col_to_sup = col_to_sup; 00159 Lstore->sup_to_col = sup_to_col; 00160 00161 } 00162 00163 00164 /* 00165 * Convert a row compressed storage into a column compressed storage. 00166 */ 00167 void 00168 sCompRow_to_CompCol(int m, int n, int nnz, 00169 float *a, int *colind, int *rowptr, 00170 float **at, int **rowind, int **colptr) 00171 { 00172 register int i, j, col, relpos; 00173 int *marker; 00174 00175 /* Allocate storage for another copy of the matrix. */ 00176 *at = (float *) floatMalloc(nnz); 00177 *rowind = (int *) intMalloc(nnz); 00178 *colptr = (int *) intMalloc(n+1); 00179 marker = (int *) intCalloc(n); 00180 00181 /* Get counts of each column of A, and set up column pointers */ 00182 for (i = 0; i < m; ++i) 00183 for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; 00184 (*colptr)[0] = 0; 00185 for (j = 0; j < n; ++j) { 00186 (*colptr)[j+1] = (*colptr)[j] + marker[j]; 00187 marker[j] = (*colptr)[j]; 00188 } 00189 00190 /* Transfer the matrix into the compressed column storage. */ 00191 for (i = 0; i < m; ++i) { 00192 for (j = rowptr[i]; j < rowptr[i+1]; ++j) { 00193 col = colind[j]; 00194 relpos = marker[col]; 00195 (*rowind)[relpos] = i; 00196 (*at)[relpos] = a[j]; 00197 ++marker[col]; 00198 } 00199 } 00200 00201 SUPERLU_FREE(marker); 00202 } 00203 00204 00205 void 00206 sPrint_CompCol_Matrix(char *what, SuperMatrix *A) 00207 { 00208 NCformat *Astore; 00209 register int i,n; 00210 float *dp; 00211 00212 printf("\nCompCol matrix %s:\n", what); 00213 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00214 n = A->ncol; 00215 Astore = (NCformat *) A->Store; 00216 dp = (float *) Astore->nzval; 00217 printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz); 00218 printf("nzval: "); 00219 for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]); 00220 printf("\nrowind: "); 00221 for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]); 00222 printf("\ncolptr: "); 00223 for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]); 00224 printf("\n"); 00225 fflush(stdout); 00226 } 00227 00228 void 00229 sPrint_SuperNode_Matrix(char *what, SuperMatrix *A) 00230 { 00231 SCformat *Astore; 00232 register int i, j, k, c, d, n, nsup; 00233 float *dp; 00234 int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr; 00235 00236 printf("\nSuperNode matrix %s:\n", what); 00237 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00238 n = A->ncol; 00239 Astore = (SCformat *) A->Store; 00240 dp = (float *) Astore->nzval; 00241 col_to_sup = Astore->col_to_sup; 00242 sup_to_col = Astore->sup_to_col; 00243 rowind_colptr = Astore->rowind_colptr; 00244 rowind = Astore->rowind; 00245 printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 00246 A->nrow,A->ncol,Astore->nnz,Astore->nsuper); 00247 printf("nzval:\n"); 00248 for (k = 0; k <= Astore->nsuper; ++k) { 00249 c = sup_to_col[k]; 00250 nsup = sup_to_col[k+1] - c; 00251 for (j = c; j < c + nsup; ++j) { 00252 d = Astore->nzval_colptr[j]; 00253 for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) { 00254 printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]); 00255 } 00256 } 00257 } 00258 #if 0 00259 for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]); 00260 #endif 00261 printf("\nnzval_colptr: "); 00262 for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]); 00263 printf("\nrowind: "); 00264 for (i = 0; i < Astore->rowind_colptr[n]; ++i) 00265 printf("%d ", Astore->rowind[i]); 00266 printf("\nrowind_colptr: "); 00267 for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]); 00268 printf("\ncol_to_sup: "); 00269 for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]); 00270 printf("\nsup_to_col: "); 00271 for (i = 0; i <= Astore->nsuper+1; ++i) 00272 printf("%d ", sup_to_col[i]); 00273 printf("\n"); 00274 fflush(stdout); 00275 } 00276 00277 void 00278 sPrint_Dense_Matrix(char *what, SuperMatrix *A) 00279 { 00280 DNformat *Astore; 00281 register int i; 00282 float *dp; 00283 00284 printf("\nDense matrix %s:\n", what); 00285 printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); 00286 Astore = (DNformat *) A->Store; 00287 dp = (float *) Astore->nzval; 00288 printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); 00289 printf("\nnzval: "); 00290 for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); 00291 printf("\n"); 00292 fflush(stdout); 00293 } 00294 00295 /* 00296 * Diagnostic print of column "jcol" in the U/L factor. 00297 */ 00298 void 00299 sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) 00300 { 00301 int i, k, fsupc; 00302 int *xsup, *supno; 00303 int *xlsub, *lsub; 00304 float *lusup; 00305 int *xlusup; 00306 float *ucol; 00307 int *usub, *xusub; 00308 00309 xsup = Glu->xsup; 00310 supno = Glu->supno; 00311 lsub = Glu->lsub; 00312 xlsub = Glu->xlsub; 00313 lusup = Glu->lusup; 00314 xlusup = Glu->xlusup; 00315 ucol = Glu->ucol; 00316 usub = Glu->usub; 00317 xusub = Glu->xusub; 00318 00319 printf("%s", msg); 00320 printf("col %d: pivrow %d, supno %d, xprune %d\n", 00321 jcol, pivrow, supno[jcol], xprune[jcol]); 00322 00323 printf("\tU-col:\n"); 00324 for (i = xusub[jcol]; i < xusub[jcol+1]; i++) 00325 printf("\t%d%10.4f\n", usub[i], ucol[i]); 00326 printf("\tL-col in rectangular snode:\n"); 00327 fsupc = xsup[supno[jcol]]; /* first col of the snode */ 00328 i = xlsub[fsupc]; 00329 k = xlusup[jcol]; 00330 while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) { 00331 printf("\t%d\t%10.4f\n", lsub[i], lusup[k]); 00332 i++; k++; 00333 } 00334 fflush(stdout); 00335 } 00336 00337 00338 /* 00339 * Check whether tempv[] == 0. This should be true before and after 00340 * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 00341 */ 00342 void scheck_tempv(int n, float *tempv) 00343 { 00344 int i; 00345 00346 for (i = 0; i < n; i++) { 00347 if (tempv[i] != 0.0) 00348 { 00349 fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]); 00350 ABORT("scheck_tempv"); 00351 } 00352 } 00353 } 00354 00355 00356 void 00357 sGenXtrue(int n, int nrhs, float *x, int ldx) 00358 { 00359 int i, j; 00360 for (j = 0; j < nrhs; ++j) 00361 for (i = 0; i < n; ++i) { 00362 x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/ 00363 } 00364 } 00365 00366 /* 00367 * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's 00368 */ 00369 void 00370 sFillRHS(trans_t trans, int nrhs, float *x, int ldx, 00371 SuperMatrix *A, SuperMatrix *B) 00372 { 00373 DNformat *Bstore; 00374 float *rhs; 00375 float one = 1.0; 00376 float zero = 0.0; 00377 int ldc; 00378 char transc[1]; 00379 00380 Bstore = B->Store; 00381 rhs = Bstore->nzval; 00382 ldc = Bstore->lda; 00383 00384 if ( trans == NOTRANS ) *(unsigned char *)transc = 'N'; 00385 else *(unsigned char *)transc = 'T'; 00386 00387 sp_sgemm(transc, nrhs, one, A, 00388 x, ldx, zero, rhs, ldc); 00389 00390 } 00391 00392 /* 00393 * Fills a float precision array with a given value. 00394 */ 00395 void 00396 sfill(float *a, int alen, float dval) 00397 { 00398 register int i; 00399 for (i = 0; i < alen; i++) a[i] = dval; 00400 } 00401 00402 00403 00404 /* 00405 * Check the inf-norm of the error vector 00406 */ 00407 void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue) 00408 { 00409 DNformat *Xstore; 00410 float err, xnorm; 00411 float *Xmat, *soln_work; 00412 int i, j; 00413 00414 Xstore = X->Store; 00415 Xmat = Xstore->nzval; 00416 00417 for (j = 0; j < nrhs; j++) { 00418 soln_work = &Xmat[j*Xstore->lda]; 00419 err = xnorm = 0.0; 00420 for (i = 0; i < X->nrow; i++) { 00421 err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i])); 00422 xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i])); 00423 } 00424 err = err / xnorm; 00425 printf("||X - Xtrue||/||X|| = %e\n", err); 00426 } 00427 } 00428 00429 00430 00431 /* Print performance of the code. */ 00432 void 00433 sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, 00434 float rpg, float rcond, float *ferr, 00435 float *berr, char *equed, SuperLUStat_t *stat) 00436 { 00437 SCformat *Lstore; 00438 NCformat *Ustore; 00439 double *utime; 00440 flops_t *ops; 00441 00442 utime = stat->utime; 00443 ops = stat->ops; 00444 00445 if ( utime[FACT] != 0. ) 00446 printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], 00447 ops[FACT]*1e-6/utime[FACT]); 00448 printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]); 00449 if ( utime[SOLVE] != 0. ) 00450 printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE], 00451 ops[SOLVE]*1e-6/utime[SOLVE]); 00452 00453 Lstore = (SCformat *) L->Store; 00454 Ustore = (NCformat *) U->Store; 00455 printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz); 00456 printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); 00457 printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); 00458 00459 printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", 00460 mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, 00461 mem_usage->expansions); 00462 00463 printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); 00464 printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", 00465 utime[FACT], ops[FACT]*1e-6/utime[FACT], 00466 utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE], 00467 utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]); 00468 00469 printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n"); 00470 printf("NUM:\t%e\t%e\t%e\t%e\t%s\n", 00471 rpg, rcond, ferr[0], berr[0], equed); 00472 00473 } 00474 00475 00476 00477 00478 int print_float_vec(char *what, int n, float *vec) 00479 { 00480 int i; 00481 printf("%s: n %d\n", what, n); 00482 for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]); 00483 return 0; 00484 } 00485