Blender V2.61 - r43446
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00014 00015 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00016 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00017 00018 Permission is hereby granted to use or copy this program for any 00019 purpose, provided the above notices are retained on all copies. 00020 Permission to modify the code and to distribute modified code is 00021 granted, provided the above notices are retained, and a notice that 00022 the code was modified is included with the above copyright notice. 00023 */ 00024 00025 #include "ssp_defs.h" 00026 00027 void 00028 sgstrf (superlu_options_t *options, SuperMatrix *A, 00029 int relax, int panel_size, int *etree, void *work, int lwork, 00030 int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, 00031 SuperLUStat_t *stat, int *info) 00032 { 00033 /* 00034 * Purpose 00035 * ======= 00036 * 00037 * SGSTRF computes an LU factorization of a general sparse m-by-n 00038 * matrix A using partial pivoting with row interchanges. 00039 * The factorization has the form 00040 * Pr * A = L * U 00041 * where Pr is a row permutation matrix, L is lower triangular with unit 00042 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 00043 * triangular (upper trapezoidal if A->nrow < A->ncol). 00044 * 00045 * See supermatrix.h for the definition of 'SuperMatrix' structure. 00046 * 00047 * Arguments 00048 * ========= 00049 * 00050 * options (input) superlu_options_t* 00051 * The structure defines the input parameters to control 00052 * how the LU decomposition will be performed. 00053 * 00054 * A (input) SuperMatrix* 00055 * Original matrix A, permuted by columns, of dimension 00056 * (A->nrow, A->ncol). The type of A can be: 00057 * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE. 00058 * 00059 * drop_tol (input) float (NOT IMPLEMENTED) 00060 * Drop tolerance parameter. At step j of the Gaussian elimination, 00061 * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. 00062 * 0 <= drop_tol <= 1. The default value of drop_tol is 0. 00063 * 00064 * relax (input) int 00065 * To control degree of relaxing supernodes. If the number 00066 * of nodes (columns) in a subtree of the elimination tree is less 00067 * than relax, this subtree is considered as one supernode, 00068 * regardless of the row structures of those columns. 00069 * 00070 * panel_size (input) int 00071 * A panel consists of at most panel_size consecutive columns. 00072 * 00073 * etree (input) int*, dimension (A->ncol) 00074 * Elimination tree of A'*A. 00075 * Note: etree is a vector of parent pointers for a forest whose 00076 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. 00077 * On input, the columns of A should be permuted so that the 00078 * etree is in a certain postorder. 00079 * 00080 * work (input/output) void*, size (lwork) (in bytes) 00081 * User-supplied work space and space for the output data structures. 00082 * Not referenced if lwork = 0; 00083 * 00084 * lwork (input) int 00085 * Specifies the size of work array in bytes. 00086 * = 0: allocate space internally by system malloc; 00087 * > 0: use user-supplied work array of length lwork in bytes, 00088 * returns error if space runs out. 00089 * = -1: the routine guesses the amount of space needed without 00090 * performing the factorization, and returns it in 00091 * *info; no other side effects. 00092 * 00093 * perm_c (input) int*, dimension (A->ncol) 00094 * Column permutation vector, which defines the 00095 * permutation matrix Pc; perm_c[i] = j means column i of A is 00096 * in position j in A*Pc. 00097 * When searching for diagonal, perm_c[*] is applied to the 00098 * row subscripts of A, so that diagonal threshold pivoting 00099 * can find the diagonal of A, rather than that of A*Pc. 00100 * 00101 * perm_r (input/output) int*, dimension (A->nrow) 00102 * Row permutation vector which defines the permutation matrix Pr, 00103 * perm_r[i] = j means row i of A is in position j in Pr*A. 00104 * If options->Fact = SamePattern_SameRowPerm, the pivoting routine 00105 * will try to use the input perm_r, unless a certain threshold 00106 * criterion is violated. In that case, perm_r is overwritten by 00107 * a new permutation determined by partial pivoting or diagonal 00108 * threshold pivoting. 00109 * Otherwise, perm_r is output argument; 00110 * 00111 * L (output) SuperMatrix* 00112 * The factor L from the factorization Pr*A=L*U; use compressed row 00113 * subscripts storage for supernodes, i.e., L has type: 00114 * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. 00115 * 00116 * U (output) SuperMatrix* 00117 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise 00118 * storage scheme, i.e., U has types: Stype = SLU_NC, 00119 * Dtype = SLU_S, Mtype = SLU_TRU. 00120 * 00121 * stat (output) SuperLUStat_t* 00122 * Record the statistics on runtime and floating-point operation count. 00123 * See util.h for the definition of 'SuperLUStat_t'. 00124 * 00125 * info (output) int* 00126 * = 0: successful exit 00127 * < 0: if info = -i, the i-th argument had an illegal value 00128 * > 0: if info = i, and i is 00129 * <= A->ncol: U(i,i) is exactly zero. The factorization has 00130 * been completed, but the factor U is exactly singular, 00131 * and division by zero will occur if it is used to solve a 00132 * system of equations. 00133 * > A->ncol: number of bytes allocated when memory allocation 00134 * failure occurred, plus A->ncol. If lwork = -1, it is 00135 * the estimated amount of space needed, plus A->ncol. 00136 * 00137 * ====================================================================== 00138 * 00139 * Local Working Arrays: 00140 * ====================== 00141 * m = number of rows in the matrix 00142 * n = number of columns in the matrix 00143 * 00144 * xprune[0:n-1]: xprune[*] points to locations in subscript 00145 * vector lsub[*]. For column i, xprune[i] denotes the point where 00146 * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 00147 * to be traversed for symbolic factorization. 00148 * 00149 * marker[0:3*m-1]: marker[i] = j means that node i has been 00150 * reached when working on column j. 00151 * Storage: relative to original row subscripts 00152 * NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 00153 * see spanel_dfs.c; marker2 is used for inner-factorization, 00154 * see scolumn_dfs.c. 00155 * 00156 * parent[0:m-1]: parent vector used during dfs 00157 * Storage: relative to new row subscripts 00158 * 00159 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 00160 * unexplored neighbor of i in lsub[*] 00161 * 00162 * segrep[0:nseg-1]: contains the list of supernodal representatives 00163 * in topological order of the dfs. A supernode representative is the 00164 * last column of a supernode. 00165 * The maximum size of segrep[] is n. 00166 * 00167 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 00168 * supernodal representative r, repfnz[r] is the location of the first 00169 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 00170 * indicates the supernode r has been explored. 00171 * NOTE: There are W of them, each used for one column of a panel. 00172 * 00173 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 00174 * the panel diagonal. These are filled in during spanel_dfs(), and are 00175 * used later in the inner LU factorization within the panel. 00176 * panel_lsub[]/dense[] pair forms the SPA data structure. 00177 * NOTE: There are W of them. 00178 * 00179 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; 00180 * NOTE: there are W of them. 00181 * 00182 * tempv[0:*]: real temporary used for dense numeric kernels; 00183 * The size of this array is defined by NUM_TEMPV() in ssp_defs.h. 00184 * 00185 */ 00186 /* Local working arrays */ 00187 NCPformat *Astore; 00188 int *iperm_r = NULL; /* inverse of perm_r; 00189 used when options->Fact == SamePattern_SameRowPerm */ 00190 int *iperm_c; /* inverse of perm_c */ 00191 int *iwork; 00192 float *swork; 00193 int *segrep, *repfnz, *parent, *xplore; 00194 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ 00195 int *xprune; 00196 int *marker; 00197 float *dense, *tempv; 00198 int *relax_end; 00199 float *a; 00200 int *asub; 00201 int *xa_begin, *xa_end; 00202 int *xsup, *supno; 00203 int *xlsub, *xlusup, *xusub; 00204 int nzlumax; 00205 static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ 00206 00207 /* Local scalars */ 00208 fact_t fact = options->Fact; 00209 double diag_pivot_thresh = options->DiagPivotThresh; 00210 int pivrow; /* pivotal row number in the original matrix A */ 00211 int nseg1; /* no of segments in U-column above panel row jcol */ 00212 int nseg; /* no of segments in each U-column */ 00213 register int jcol; 00214 register int kcol; /* end column of a relaxed snode */ 00215 register int icol; 00216 register int i, k, jj, new_next, iinfo; 00217 int m, n, min_mn, jsupno, fsupc, nextlu, nextu; 00218 int w_def; /* upper bound on panel width */ 00219 int usepr, iperm_r_allocated = 0; 00220 int nnzL, nnzU; 00221 int *panel_histo = stat->panel_histo; 00222 flops_t *ops = stat->ops; 00223 00224 iinfo = 0; 00225 m = A->nrow; 00226 n = A->ncol; 00227 min_mn = SUPERLU_MIN(m, n); 00228 Astore = A->Store; 00229 a = Astore->nzval; 00230 asub = Astore->rowind; 00231 xa_begin = Astore->colbeg; 00232 xa_end = Astore->colend; 00233 00234 /* Allocate storage common to the factor routines */ 00235 *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, 00236 panel_size, L, U, &Glu, &iwork, &swork); 00237 if ( *info ) return; 00238 00239 xsup = Glu.xsup; 00240 supno = Glu.supno; 00241 xlsub = Glu.xlsub; 00242 xlusup = Glu.xlusup; 00243 xusub = Glu.xusub; 00244 00245 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, 00246 &repfnz, &panel_lsub, &xprune, &marker); 00247 sSetRWork(m, panel_size, swork, &dense, &tempv); 00248 00249 usepr = (fact == SamePattern_SameRowPerm); 00250 if ( usepr ) { 00251 /* Compute the inverse of perm_r */ 00252 iperm_r = (int *) intMalloc(m); 00253 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; 00254 iperm_r_allocated = 1; 00255 } 00256 iperm_c = (int *) intMalloc(n); 00257 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; 00258 00259 /* Identify relaxed snodes */ 00260 relax_end = (int *) intMalloc(n); 00261 if ( options->SymmetricMode == YES ) { 00262 heap_relax_snode(n, etree, relax, marker, relax_end); 00263 } else { 00264 relax_snode(n, etree, relax, marker, relax_end); 00265 } 00266 00267 ifill (perm_r, m, EMPTY); 00268 ifill (marker, m * NO_MARKER, EMPTY); 00269 supno[0] = -1; 00270 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; 00271 w_def = panel_size; 00272 00273 /* 00274 * Work on one "panel" at a time. A panel is one of the following: 00275 * (a) a relaxed supernode at the bottom of the etree, or 00276 * (b) panel_size contiguous columns, defined by the user 00277 */ 00278 for (jcol = 0; jcol < min_mn; ) { 00279 00280 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ 00281 kcol = relax_end[jcol]; /* end of the relaxed snode */ 00282 panel_histo[kcol-jcol+1]++; 00283 00284 /* -------------------------------------- 00285 * Factorize the relaxed supernode(jcol:kcol) 00286 * -------------------------------------- */ 00287 /* Determine the union of the row structure of the snode */ 00288 if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end, 00289 xprune, marker, &Glu)) != 0 ) { 00290 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00291 SUPERLU_FREE (iperm_c); 00292 SUPERLU_FREE (relax_end); 00293 return; 00294 } 00295 00296 nextu = xusub[jcol]; 00297 nextlu = xlusup[jcol]; 00298 jsupno = supno[jcol]; 00299 fsupc = xsup[jsupno]; 00300 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); 00301 nzlumax = Glu.nzlumax; 00302 while ( new_next > nzlumax ) { 00303 if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) ) { 00304 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00305 SUPERLU_FREE (iperm_c); 00306 SUPERLU_FREE (relax_end); 00307 return; 00308 } 00309 } 00310 00311 for (icol = jcol; icol<= kcol; icol++) { 00312 xusub[icol+1] = nextu; 00313 00314 /* Scatter into SPA dense[*] */ 00315 for (k = xa_begin[icol]; k < xa_end[icol]; k++) 00316 dense[asub[k]] = a[k]; 00317 00318 /* Numeric update within the snode */ 00319 ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat); 00320 00321 if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r, 00322 iperm_r, iperm_c, &pivrow, &Glu, stat)) ) 00323 if ( iinfo == 0 ) iinfo = *info; 00324 00325 #ifdef DEBUG 00326 sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu); 00327 #endif 00328 00329 } 00330 00331 jcol = icol; 00332 00333 } else { /* Work on one panel of panel_size columns */ 00334 00335 /* Adjust panel_size so that a panel won't overlap with the next 00336 * relaxed snode. 00337 */ 00338 panel_size = w_def; 00339 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 00340 if ( relax_end[k] != EMPTY ) { 00341 panel_size = k - jcol; 00342 break; 00343 } 00344 if ( k == min_mn ) panel_size = min_mn - jcol; 00345 panel_histo[panel_size]++; 00346 00347 /* symbolic factor on a panel of columns */ 00348 spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, 00349 dense, panel_lsub, segrep, repfnz, xprune, 00350 marker, parent, xplore, &Glu); 00351 00352 /* numeric sup-panel updates in topological order */ 00353 spanel_bmod(m, panel_size, jcol, nseg1, dense, 00354 tempv, segrep, repfnz, &Glu, stat); 00355 00356 /* Sparse LU within the panel, and below panel diagonal */ 00357 for ( jj = jcol; jj < jcol + panel_size; jj++) { 00358 k = (jj - jcol) * m; /* column index for w-wide arrays */ 00359 00360 nseg = nseg1; /* Begin after all the panel segments */ 00361 00362 if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], 00363 segrep, &repfnz[k], xprune, marker, 00364 parent, xplore, &Glu)) != 0) { 00365 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00366 SUPERLU_FREE (iperm_c); 00367 SUPERLU_FREE (relax_end); 00368 return; 00369 } 00370 00371 /* Numeric updates */ 00372 if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k], 00373 tempv, &segrep[nseg1], &repfnz[k], 00374 jcol, &Glu, stat)) != 0) { 00375 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00376 SUPERLU_FREE (iperm_c); 00377 SUPERLU_FREE (relax_end); 00378 return; 00379 } 00380 00381 /* Copy the U-segments to ucol[*] */ 00382 if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k], 00383 perm_r, &dense[k], &Glu)) != 0) { 00384 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00385 SUPERLU_FREE (iperm_c); 00386 SUPERLU_FREE (relax_end); 00387 return; 00388 } 00389 00390 if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r, 00391 iperm_r, iperm_c, &pivrow, &Glu, stat)) ) 00392 if ( iinfo == 0 ) iinfo = *info; 00393 00394 /* Prune columns (0:jj-1) using column jj */ 00395 spruneL(jj, perm_r, pivrow, nseg, segrep, 00396 &repfnz[k], xprune, &Glu); 00397 00398 /* Reset repfnz[] for this column */ 00399 resetrep_col (nseg, segrep, &repfnz[k]); 00400 00401 #ifdef DEBUG 00402 sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu); 00403 #endif 00404 00405 } 00406 00407 jcol += panel_size; /* Move to the next panel */ 00408 00409 } /* else */ 00410 00411 } /* for */ 00412 00413 *info = iinfo; 00414 00415 if ( m > n ) { 00416 k = 0; 00417 for (i = 0; i < m; ++i) 00418 if ( perm_r[i] == EMPTY ) { 00419 perm_r[i] = n + k; 00420 ++k; 00421 } 00422 } 00423 00424 countnz(min_mn, xprune, &nnzL, &nnzU, &Glu); 00425 fixupL(min_mn, perm_r, &Glu); 00426 00427 sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */ 00428 00429 if ( fact == SamePattern_SameRowPerm ) { 00430 /* L and U structures may have changed due to possibly different 00431 pivoting, even though the storage is available. 00432 There could also be memory expansions, so the array locations 00433 may have changed, */ 00434 ((SCformat *)L->Store)->nnz = nnzL; 00435 ((SCformat *)L->Store)->nsuper = Glu.supno[n]; 00436 ((SCformat *)L->Store)->nzval = Glu.lusup; 00437 ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; 00438 ((SCformat *)L->Store)->rowind = Glu.lsub; 00439 ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; 00440 ((NCformat *)U->Store)->nnz = nnzU; 00441 ((NCformat *)U->Store)->nzval = Glu.ucol; 00442 ((NCformat *)U->Store)->rowind = Glu.usub; 00443 ((NCformat *)U->Store)->colptr = Glu.xusub; 00444 } else { 00445 sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, 00446 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, 00447 Glu.xsup, SLU_SC, SLU_S, SLU_TRLU); 00448 sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 00449 Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU); 00450 } 00451 00452 ops[FACT] += ops[TRSV] + ops[GEMV]; 00453 00454 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); 00455 SUPERLU_FREE (iperm_c); 00456 SUPERLU_FREE (relax_end); 00457 }