Blender V2.61 - r43446
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00014 00015 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00016 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00017 00018 Permission is hereby granted to use or copy this program for any 00019 purpose, provided the above notices are retained on all copies. 00020 Permission to modify the code and to distribute modified code is 00021 granted, provided the above notices are retained, and a notice that 00022 the code was modified is included with the above copyright notice. 00023 */ 00024 00025 #include <stdio.h> 00026 #include <stdlib.h> 00027 #include "ssp_defs.h" 00028 00029 /* 00030 * Function prototypes 00031 */ 00032 void susolve(int, int, float*, float*); 00033 void slsolve(int, int, float*, float*); 00034 void smatvec(int, int, int, float*, float*, float*); 00035 00036 00037 00038 /* Return value: 0 - successful return 00039 * > 0 - number of bytes allocated when run out of space 00040 */ 00041 int 00042 scolumn_bmod ( 00043 const int jcol, /* in */ 00044 const int nseg, /* in */ 00045 float *dense, /* in */ 00046 float *tempv, /* working array */ 00047 int *segrep, /* in */ 00048 int *repfnz, /* in */ 00049 int fpanelc, /* in -- first column in the current panel */ 00050 GlobalLU_t *Glu, /* modified */ 00051 SuperLUStat_t *stat /* output */ 00052 ) 00053 { 00054 /* 00055 * Purpose: 00056 * ======== 00057 * Performs numeric block updates (sup-col) in topological order. 00058 * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. 00059 * Special processing on the supernodal portion of L\U[*,j] 00060 * 00061 */ 00062 #ifdef _CRAY 00063 _fcd ftcs1 = _cptofcd("L", strlen("L")), 00064 ftcs2 = _cptofcd("N", strlen("N")), 00065 ftcs3 = _cptofcd("U", strlen("U")); 00066 #endif 00067 00068 #ifdef USE_VENDOR_BLAS 00069 int incx = 1, incy = 1; 00070 float alpha, beta; 00071 #endif 00072 00073 /* krep = representative of current k-th supernode 00074 * fsupc = first supernodal column 00075 * nsupc = no of columns in supernode 00076 * nsupr = no of rows in supernode (used as leading dimension) 00077 * luptr = location of supernodal LU-block in storage 00078 * kfnz = first nonz in the k-th supernodal segment 00079 * no_zeros = no of leading zeros in a supernodal U-segment 00080 */ 00081 float ukj, ukj1, ukj2; 00082 int luptr, luptr1, luptr2; 00083 int fsupc, nsupc, nsupr, segsze; 00084 int nrow; /* No of rows in the matrix of matrix-vector */ 00085 int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno; 00086 register int lptr, kfnz, isub, irow, i; 00087 register int no_zeros, new_next; 00088 int ufirst, nextlu; 00089 int fst_col; /* First column within small LU update */ 00090 int d_fsupc; /* Distance between the first column of the current 00091 panel and the first column of the current snode. */ 00092 int *xsup, *supno; 00093 int *lsub, *xlsub; 00094 float *lusup; 00095 int *xlusup; 00096 int nzlumax; 00097 float *tempv1; 00098 float zero = 0.0; 00099 #ifdef USE_VENDOR_BLAS 00100 float one = 1.0; 00101 float none = -1.0; 00102 #endif 00103 int mem_error; 00104 flops_t *ops = stat->ops; 00105 00106 xsup = Glu->xsup; 00107 supno = Glu->supno; 00108 lsub = Glu->lsub; 00109 xlsub = Glu->xlsub; 00110 lusup = Glu->lusup; 00111 xlusup = Glu->xlusup; 00112 nzlumax = Glu->nzlumax; 00113 jcolp1 = jcol + 1; 00114 jsupno = supno[jcol]; 00115 00116 /* 00117 * For each nonz supernode segment of U[*,j] in topological order 00118 */ 00119 k = nseg - 1; 00120 for (ksub = 0; ksub < nseg; ksub++) { 00121 00122 krep = segrep[k]; 00123 k--; 00124 ksupno = supno[krep]; 00125 if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ 00126 00127 fsupc = xsup[ksupno]; 00128 fst_col = SUPERLU_MAX ( fsupc, fpanelc ); 00129 00130 /* Distance from the current supernode to the current panel; 00131 d_fsupc=0 if fsupc > fpanelc. */ 00132 d_fsupc = fst_col - fsupc; 00133 00134 luptr = xlusup[fst_col] + d_fsupc; 00135 lptr = xlsub[fsupc] + d_fsupc; 00136 00137 kfnz = repfnz[krep]; 00138 kfnz = SUPERLU_MAX ( kfnz, fpanelc ); 00139 00140 segsze = krep - kfnz + 1; 00141 nsupc = krep - fst_col + 1; 00142 nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ 00143 nrow = nsupr - d_fsupc - nsupc; 00144 krep_ind = lptr + nsupc - 1; 00145 00146 ops[TRSV] += segsze * (segsze - 1); 00147 ops[GEMV] += 2 * nrow * segsze; 00148 00149 00150 /* 00151 * Case 1: Update U-segment of size 1 -- col-col update 00152 */ 00153 if ( segsze == 1 ) { 00154 ukj = dense[lsub[krep_ind]]; 00155 luptr += nsupr*(nsupc-1) + nsupc; 00156 00157 for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { 00158 irow = lsub[i]; 00159 dense[irow] -= ukj*lusup[luptr]; 00160 luptr++; 00161 } 00162 00163 } else if ( segsze <= 3 ) { 00164 ukj = dense[lsub[krep_ind]]; 00165 luptr += nsupr*(nsupc-1) + nsupc-1; 00166 ukj1 = dense[lsub[krep_ind - 1]]; 00167 luptr1 = luptr - nsupr; 00168 00169 if ( segsze == 2 ) { /* Case 2: 2cols-col update */ 00170 ukj -= ukj1 * lusup[luptr1]; 00171 dense[lsub[krep_ind]] = ukj; 00172 for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { 00173 irow = lsub[i]; 00174 luptr++; 00175 luptr1++; 00176 dense[irow] -= ( ukj*lusup[luptr] 00177 + ukj1*lusup[luptr1] ); 00178 } 00179 } else { /* Case 3: 3cols-col update */ 00180 ukj2 = dense[lsub[krep_ind - 2]]; 00181 luptr2 = luptr1 - nsupr; 00182 ukj1 -= ukj2 * lusup[luptr2-1]; 00183 ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2]; 00184 dense[lsub[krep_ind]] = ukj; 00185 dense[lsub[krep_ind-1]] = ukj1; 00186 for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { 00187 irow = lsub[i]; 00188 luptr++; 00189 luptr1++; 00190 luptr2++; 00191 dense[irow] -= ( ukj*lusup[luptr] 00192 + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] ); 00193 } 00194 } 00195 00196 00197 00198 } else { 00199 /* 00200 * Case: sup-col update 00201 * Perform a triangular solve and block update, 00202 * then scatter the result of sup-col update to dense 00203 */ 00204 00205 no_zeros = kfnz - fst_col; 00206 00207 /* Copy U[*,j] segment from dense[*] to tempv[*] */ 00208 isub = lptr + no_zeros; 00209 for (i = 0; i < segsze; i++) { 00210 irow = lsub[isub]; 00211 tempv[i] = dense[irow]; 00212 ++isub; 00213 } 00214 00215 /* Dense triangular solve -- start effective triangle */ 00216 luptr += nsupr * no_zeros + no_zeros; 00217 00218 #ifdef USE_VENDOR_BLAS 00219 #ifdef _CRAY 00220 STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 00221 &nsupr, tempv, &incx ); 00222 #else 00223 strsv_( "L", "N", "U", &segsze, &lusup[luptr], 00224 &nsupr, tempv, &incx ); 00225 #endif 00226 luptr += segsze; /* Dense matrix-vector */ 00227 tempv1 = &tempv[segsze]; 00228 alpha = one; 00229 beta = zero; 00230 #ifdef _CRAY 00231 SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 00232 &nsupr, tempv, &incx, &beta, tempv1, &incy ); 00233 #else 00234 sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 00235 &nsupr, tempv, &incx, &beta, tempv1, &incy ); 00236 #endif 00237 #else 00238 slsolve ( nsupr, segsze, &lusup[luptr], tempv ); 00239 00240 luptr += segsze; /* Dense matrix-vector */ 00241 tempv1 = &tempv[segsze]; 00242 smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); 00243 #endif 00244 00245 00246 /* Scatter tempv[] into SPA dense[] as a temporary storage */ 00247 isub = lptr + no_zeros; 00248 for (i = 0; i < segsze; i++) { 00249 irow = lsub[isub]; 00250 dense[irow] = tempv[i]; 00251 tempv[i] = zero; 00252 ++isub; 00253 } 00254 00255 /* Scatter tempv1[] into SPA dense[] */ 00256 for (i = 0; i < nrow; i++) { 00257 irow = lsub[isub]; 00258 dense[irow] -= tempv1[i]; 00259 tempv1[i] = zero; 00260 ++isub; 00261 } 00262 } 00263 00264 } /* if jsupno ... */ 00265 00266 } /* for each segment... */ 00267 00268 /* 00269 * Process the supernodal portion of L\U[*,j] 00270 */ 00271 nextlu = xlusup[jcol]; 00272 fsupc = xsup[jsupno]; 00273 00274 /* Copy the SPA dense into L\U[*,j] */ 00275 new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc]; 00276 while ( new_next > nzlumax ) { 00277 if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))) 00278 return (mem_error); 00279 lusup = Glu->lusup; 00280 lsub = Glu->lsub; 00281 } 00282 00283 for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) { 00284 irow = lsub[isub]; 00285 lusup[nextlu] = dense[irow]; 00286 dense[irow] = zero; 00287 ++nextlu; 00288 } 00289 00290 xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */ 00291 00292 /* For more updates within the panel (also within the current supernode), 00293 * should start from the first column of the panel, or the first column 00294 * of the supernode, whichever is bigger. There are 2 cases: 00295 * 1) fsupc < fpanelc, then fst_col := fpanelc 00296 * 2) fsupc >= fpanelc, then fst_col := fsupc 00297 */ 00298 fst_col = SUPERLU_MAX ( fsupc, fpanelc ); 00299 00300 if ( fst_col < jcol ) { 00301 00302 /* Distance between the current supernode and the current panel. 00303 d_fsupc=0 if fsupc >= fpanelc. */ 00304 d_fsupc = fst_col - fsupc; 00305 00306 luptr = xlusup[fst_col] + d_fsupc; 00307 nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ 00308 nsupc = jcol - fst_col; /* Excluding jcol */ 00309 nrow = nsupr - d_fsupc - nsupc; 00310 00311 /* Points to the beginning of jcol in snode L\U(jsupno) */ 00312 ufirst = xlusup[jcol] + d_fsupc; 00313 00314 ops[TRSV] += nsupc * (nsupc - 1); 00315 ops[GEMV] += 2 * nrow * nsupc; 00316 00317 #ifdef USE_VENDOR_BLAS 00318 #ifdef _CRAY 00319 STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 00320 &nsupr, &lusup[ufirst], &incx ); 00321 #else 00322 strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 00323 &nsupr, &lusup[ufirst], &incx ); 00324 #endif 00325 00326 alpha = none; beta = one; /* y := beta*y + alpha*A*x */ 00327 00328 #ifdef _CRAY 00329 SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 00330 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); 00331 #else 00332 sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 00333 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); 00334 #endif 00335 #else 00336 slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); 00337 00338 smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 00339 &lusup[ufirst], tempv ); 00340 00341 /* Copy updates from tempv[*] into lusup[*] */ 00342 isub = ufirst + nsupc; 00343 for (i = 0; i < nrow; i++) { 00344 lusup[isub] -= tempv[i]; 00345 tempv[i] = 0.0; 00346 ++isub; 00347 } 00348 00349 #endif 00350 00351 00352 } /* if fst_col < jcol ... */ 00353 00354 return 0; 00355 }