Blender V2.61 - r43446
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 * File name: ssp_blas2.c 00014 * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. 00015 */ 00016 00017 #include "ssp_defs.h" 00018 00019 /* 00020 * Function prototypes 00021 */ 00022 void susolve(int, int, float*, float*); 00023 void slsolve(int, int, float*, float*); 00024 void smatvec(int, int, int, float*, float*, float*); 00025 int strsv_(char*, char*, char*, int*, float*, int*, float*, int*); 00026 00027 int 00028 sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 00029 SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info) 00030 { 00031 /* 00032 * Purpose 00033 * ======= 00034 * 00035 * sp_strsv() solves one of the systems of equations 00036 * A*x = b, or A'*x = b, 00037 * where b and x are n element vectors and A is a sparse unit , or 00038 * non-unit, upper or lower triangular matrix. 00039 * No test for singularity or near-singularity is included in this 00040 * routine. Such tests must be performed before calling this routine. 00041 * 00042 * Parameters 00043 * ========== 00044 * 00045 * uplo - (input) char* 00046 * On entry, uplo specifies whether the matrix is an upper or 00047 * lower triangular matrix as follows: 00048 * uplo = 'U' or 'u' A is an upper triangular matrix. 00049 * uplo = 'L' or 'l' A is a lower triangular matrix. 00050 * 00051 * trans - (input) char* 00052 * On entry, trans specifies the equations to be solved as 00053 * follows: 00054 * trans = 'N' or 'n' A*x = b. 00055 * trans = 'T' or 't' A'*x = b. 00056 * trans = 'C' or 'c' A'*x = b. 00057 * 00058 * diag - (input) char* 00059 * On entry, diag specifies whether or not A is unit 00060 * triangular as follows: 00061 * diag = 'U' or 'u' A is assumed to be unit triangular. 00062 * diag = 'N' or 'n' A is not assumed to be unit 00063 * triangular. 00064 * 00065 * L - (input) SuperMatrix* 00066 * The factor L from the factorization Pr*A*Pc=L*U. Use 00067 * compressed row subscripts storage for supernodes, 00068 * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU. 00069 * 00070 * U - (input) SuperMatrix* 00071 * The factor U from the factorization Pr*A*Pc=L*U. 00072 * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU. 00073 * 00074 * x - (input/output) float* 00075 * Before entry, the incremented array X must contain the n 00076 * element right-hand side vector b. On exit, X is overwritten 00077 * with the solution vector x. 00078 * 00079 * info - (output) int* 00080 * If *info = -i, the i-th argument had an illegal value. 00081 * 00082 */ 00083 #ifdef _CRAY 00084 _fcd ftcs1 = _cptofcd("L", strlen("L")), 00085 ftcs2 = _cptofcd("N", strlen("N")), 00086 ftcs3 = _cptofcd("U", strlen("U")); 00087 #endif 00088 SCformat *Lstore; 00089 NCformat *Ustore; 00090 float *Lval, *Uval; 00091 int incx = 1; 00092 int nrow; 00093 int fsupc, nsupr, nsupc, luptr, istart, irow; 00094 int i, k, iptr, jcol; 00095 float *work; 00096 flops_t solve_ops; 00097 00098 /* Test the input parameters */ 00099 *info = 0; 00100 if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; 00101 else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 00102 !lsame_(trans, "C")) *info = -2; 00103 else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; 00104 else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; 00105 else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; 00106 if ( *info ) { 00107 i = -(*info); 00108 xerbla_("sp_strsv", &i); 00109 return 0; 00110 } 00111 00112 Lstore = L->Store; 00113 Lval = Lstore->nzval; 00114 Ustore = U->Store; 00115 Uval = Ustore->nzval; 00116 solve_ops = 0; 00117 00118 if ( !(work = floatCalloc(L->nrow)) ) 00119 ABORT("Malloc fails for work in sp_strsv()."); 00120 00121 if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */ 00122 00123 if ( lsame_(uplo, "L") ) { 00124 /* Form x := inv(L)*x */ 00125 if ( L->nrow == 0 ) { 00126 SUPERLU_FREE(work); 00127 return 0; /* Quick return */ 00128 } 00129 00130 for (k = 0; k <= Lstore->nsuper; k++) { 00131 fsupc = L_FST_SUPC(k); 00132 istart = L_SUB_START(fsupc); 00133 nsupr = L_SUB_START(fsupc+1) - istart; 00134 nsupc = L_FST_SUPC(k+1) - fsupc; 00135 luptr = L_NZ_START(fsupc); 00136 nrow = nsupr - nsupc; 00137 00138 solve_ops += nsupc * (nsupc - 1); 00139 solve_ops += 2 * nrow * nsupc; 00140 00141 if ( nsupc == 1 ) { 00142 for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { 00143 irow = L_SUB(iptr); 00144 ++luptr; 00145 x[irow] -= x[fsupc] * Lval[luptr]; 00146 } 00147 } else { 00148 #ifdef USE_VENDOR_BLAS 00149 #ifdef _CRAY 00150 STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, 00151 &x[fsupc], &incx); 00152 00153 SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 00154 &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); 00155 #else 00156 strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, 00157 &x[fsupc], &incx); 00158 00159 sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 00160 &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); 00161 #endif 00162 #else 00163 slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); 00164 00165 smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], 00166 &x[fsupc], &work[0] ); 00167 #endif 00168 00169 iptr = istart + nsupc; 00170 for (i = 0; i < nrow; ++i, ++iptr) { 00171 irow = L_SUB(iptr); 00172 x[irow] -= work[i]; /* Scatter */ 00173 work[i] = 0.0; 00174 00175 } 00176 } 00177 } /* for k ... */ 00178 00179 } else { 00180 /* Form x := inv(U)*x */ 00181 00182 if ( U->nrow == 0 ) return 0; /* Quick return */ 00183 00184 for (k = Lstore->nsuper; k >= 0; k--) { 00185 fsupc = L_FST_SUPC(k); 00186 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); 00187 nsupc = L_FST_SUPC(k+1) - fsupc; 00188 luptr = L_NZ_START(fsupc); 00189 00190 solve_ops += nsupc * (nsupc + 1); 00191 00192 if ( nsupc == 1 ) { 00193 x[fsupc] /= Lval[luptr]; 00194 for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { 00195 irow = U_SUB(i); 00196 x[irow] -= x[fsupc] * Uval[i]; 00197 } 00198 } else { 00199 #ifdef USE_VENDOR_BLAS 00200 #ifdef _CRAY 00201 STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, 00202 &x[fsupc], &incx); 00203 #else 00204 strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, 00205 &x[fsupc], &incx); 00206 #endif 00207 #else 00208 susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); 00209 #endif 00210 00211 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { 00212 solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); 00213 for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 00214 i++) { 00215 irow = U_SUB(i); 00216 x[irow] -= x[jcol] * Uval[i]; 00217 } 00218 } 00219 } 00220 } /* for k ... */ 00221 00222 } 00223 } else { /* Form x := inv(A')*x */ 00224 00225 if ( lsame_(uplo, "L") ) { 00226 /* Form x := inv(L')*x */ 00227 if ( L->nrow == 0 ) return 0; /* Quick return */ 00228 00229 for (k = Lstore->nsuper; k >= 0; --k) { 00230 fsupc = L_FST_SUPC(k); 00231 istart = L_SUB_START(fsupc); 00232 nsupr = L_SUB_START(fsupc+1) - istart; 00233 nsupc = L_FST_SUPC(k+1) - fsupc; 00234 luptr = L_NZ_START(fsupc); 00235 00236 solve_ops += 2 * (nsupr - nsupc) * nsupc; 00237 00238 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { 00239 iptr = istart + nsupc; 00240 for (i = L_NZ_START(jcol) + nsupc; 00241 i < L_NZ_START(jcol+1); i++) { 00242 irow = L_SUB(iptr); 00243 x[jcol] -= x[irow] * Lval[i]; 00244 iptr++; 00245 } 00246 } 00247 00248 if ( nsupc > 1 ) { 00249 solve_ops += nsupc * (nsupc - 1); 00250 #ifdef _CRAY 00251 ftcs1 = _cptofcd("L", strlen("L")); 00252 ftcs2 = _cptofcd("T", strlen("T")); 00253 ftcs3 = _cptofcd("U", strlen("U")); 00254 STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, 00255 &x[fsupc], &incx); 00256 #else 00257 strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, 00258 &x[fsupc], &incx); 00259 #endif 00260 } 00261 } 00262 } else { 00263 /* Form x := inv(U')*x */ 00264 if ( U->nrow == 0 ) return 0; /* Quick return */ 00265 00266 for (k = 0; k <= Lstore->nsuper; k++) { 00267 fsupc = L_FST_SUPC(k); 00268 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); 00269 nsupc = L_FST_SUPC(k+1) - fsupc; 00270 luptr = L_NZ_START(fsupc); 00271 00272 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { 00273 solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); 00274 for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { 00275 irow = U_SUB(i); 00276 x[jcol] -= x[irow] * Uval[i]; 00277 } 00278 } 00279 00280 solve_ops += nsupc * (nsupc + 1); 00281 00282 if ( nsupc == 1 ) { 00283 x[fsupc] /= Lval[luptr]; 00284 } else { 00285 #ifdef _CRAY 00286 ftcs1 = _cptofcd("U", strlen("U")); 00287 ftcs2 = _cptofcd("T", strlen("T")); 00288 ftcs3 = _cptofcd("N", strlen("N")); 00289 STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, 00290 &x[fsupc], &incx); 00291 #else 00292 strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, 00293 &x[fsupc], &incx); 00294 #endif 00295 } 00296 } /* for k ... */ 00297 } 00298 } 00299 00300 stat->ops[SOLVE] += solve_ops; 00301 SUPERLU_FREE(work); 00302 return 0; 00303 } 00304 00305 00306 00307 00308 int 00309 sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 00310 int incx, float beta, float *y, int incy) 00311 { 00312 /* Purpose 00313 ======= 00314 00315 sp_sgemv() performs one of the matrix-vector operations 00316 y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, 00317 where alpha and beta are scalars, x and y are vectors and A is a 00318 sparse A->nrow by A->ncol matrix. 00319 00320 Parameters 00321 ========== 00322 00323 TRANS - (input) char* 00324 On entry, TRANS specifies the operation to be performed as 00325 follows: 00326 TRANS = 'N' or 'n' y := alpha*A*x + beta*y. 00327 TRANS = 'T' or 't' y := alpha*A'*x + beta*y. 00328 TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. 00329 00330 ALPHA - (input) float 00331 On entry, ALPHA specifies the scalar alpha. 00332 00333 A - (input) SuperMatrix* 00334 Matrix A with a sparse format, of dimension (A->nrow, A->ncol). 00335 Currently, the type of A can be: 00336 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 00337 In the future, more general A can be handled. 00338 00339 X - (input) float*, array of DIMENSION at least 00340 ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' 00341 and at least 00342 ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. 00343 Before entry, the incremented array X must contain the 00344 vector x. 00345 00346 INCX - (input) int 00347 On entry, INCX specifies the increment for the elements of 00348 X. INCX must not be zero. 00349 00350 BETA - (input) float 00351 On entry, BETA specifies the scalar beta. When BETA is 00352 supplied as zero then Y need not be set on input. 00353 00354 Y - (output) float*, array of DIMENSION at least 00355 ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' 00356 and at least 00357 ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. 00358 Before entry with BETA non-zero, the incremented array Y 00359 must contain the vector y. On exit, Y is overwritten by the 00360 updated vector y. 00361 00362 INCY - (input) int 00363 On entry, INCY specifies the increment for the elements of 00364 Y. INCY must not be zero. 00365 00366 ==== Sparse Level 2 Blas routine. 00367 */ 00368 00369 /* Local variables */ 00370 NCformat *Astore; 00371 float *Aval; 00372 int info; 00373 float temp; 00374 int lenx, leny, i, j, irow; 00375 int iy, jx, jy, kx, ky; 00376 int notran; 00377 00378 notran = lsame_(trans, "N"); 00379 Astore = A->Store; 00380 Aval = Astore->nzval; 00381 00382 /* Test the input parameters */ 00383 info = 0; 00384 if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; 00385 else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; 00386 else if (incx == 0) info = 5; 00387 else if (incy == 0) info = 8; 00388 if (info != 0) { 00389 xerbla_("sp_sgemv ", &info); 00390 return 0; 00391 } 00392 00393 /* Quick return if possible. */ 00394 if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.)) 00395 return 0; 00396 00397 /* Set LENX and LENY, the lengths of the vectors x and y, and set 00398 up the start points in X and Y. */ 00399 if (lsame_(trans, "N")) { 00400 lenx = A->ncol; 00401 leny = A->nrow; 00402 } else { 00403 lenx = A->nrow; 00404 leny = A->ncol; 00405 } 00406 if (incx > 0) kx = 0; 00407 else kx = - (lenx - 1) * incx; 00408 if (incy > 0) ky = 0; 00409 else ky = - (leny - 1) * incy; 00410 00411 /* Start the operations. In this version the elements of A are 00412 accessed sequentially with one pass through A. */ 00413 /* First form y := beta*y. */ 00414 if (beta != 1.) { 00415 if (incy == 1) { 00416 if (beta == 0.) 00417 for (i = 0; i < leny; ++i) y[i] = 0.; 00418 else 00419 for (i = 0; i < leny; ++i) y[i] = beta * y[i]; 00420 } else { 00421 iy = ky; 00422 if (beta == 0.) 00423 for (i = 0; i < leny; ++i) { 00424 y[iy] = 0.; 00425 iy += incy; 00426 } 00427 else 00428 for (i = 0; i < leny; ++i) { 00429 y[iy] = beta * y[iy]; 00430 iy += incy; 00431 } 00432 } 00433 } 00434 00435 if (alpha == 0.) return 0; 00436 00437 if ( notran ) { 00438 /* Form y := alpha*A*x + y. */ 00439 jx = kx; 00440 if (incy == 1) { 00441 for (j = 0; j < A->ncol; ++j) { 00442 if (x[jx] != 0.) { 00443 temp = alpha * x[jx]; 00444 for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { 00445 irow = Astore->rowind[i]; 00446 y[irow] += temp * Aval[i]; 00447 } 00448 } 00449 jx += incx; 00450 } 00451 } else { 00452 ABORT("Not implemented."); 00453 } 00454 } else { 00455 /* Form y := alpha*A'*x + y. */ 00456 jy = ky; 00457 if (incx == 1) { 00458 for (j = 0; j < A->ncol; ++j) { 00459 temp = 0.; 00460 for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { 00461 irow = Astore->rowind[i]; 00462 temp += Aval[i] * x[irow]; 00463 } 00464 y[jy] += alpha * temp; 00465 jy += incy; 00466 } 00467 } else { 00468 ABORT("Not implemented."); 00469 } 00470 } 00471 return 0; 00472 } /* sp_sgemv */ 00473 00474 00475