Blender V2.61 - r43446

spanel_bmod.c

Go to the documentation of this file.
00001 
00005 /*
00006  * -- SuperLU routine (version 3.0) --
00007  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00008  * and Lawrence Berkeley National Lab.
00009  * October 15, 2003
00010  *
00011  */
00012 /*
00013   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00014  
00015   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00016   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00017  
00018   Permission is hereby granted to use or copy this program for any
00019   purpose, provided the above notices are retained on all copies.
00020   Permission to modify the code and to distribute modified code is
00021   granted, provided the above notices are retained, and a notice that
00022   the code was modified is included with the above copyright notice.
00023 */
00024 
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include "ssp_defs.h"
00028 
00029 /* 
00030  * Function prototypes 
00031  */
00032 void slsolve(int, int, float *, float *);
00033 void smatvec(int, int, int, float *, float *, float *);
00034 extern void scheck_tempv();
00035 
00036 void
00037 spanel_bmod (
00038         const int  m,          /* in - number of rows in the matrix */
00039         const int  w,          /* in */
00040         const int  jcol,       /* in */
00041         const int  nseg,       /* in */
00042         float     *dense,     /* out, of size n by w */
00043         float     *tempv,     /* working array */
00044         int        *segrep,    /* in */
00045         int        *repfnz,    /* in, of size n by w */
00046         GlobalLU_t *Glu,       /* modified */
00047         SuperLUStat_t *stat    /* output */
00048         )
00049 {
00050 /* 
00051  * Purpose
00052  * =======
00053  *
00054  *    Performs numeric block updates (sup-panel) in topological order.
00055  *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
00056  *    Special processing on the supernodal portion of L\U[*,j]
00057  *
00058  *    Before entering this routine, the original nonzeros in the panel 
00059  *    were already copied into the spa[m,w].
00060  *
00061  *    Updated/Output parameters-
00062  *  dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
00063  *      collectively in the m-by-w vector dense[*]. 
00064  *
00065  */
00066 
00067 #ifdef USE_VENDOR_BLAS
00068 #ifdef _CRAY
00069     _fcd ftcs1 = _cptofcd("L", strlen("L")),
00070          ftcs2 = _cptofcd("N", strlen("N")),
00071          ftcs3 = _cptofcd("U", strlen("U"));
00072 #endif
00073     int          incx = 1, incy = 1;
00074     float       alpha, beta;
00075 #endif
00076 
00077     register int k, ksub;
00078     int          fsupc, nsupc, nsupr, nrow;
00079     int          krep, krep_ind;
00080     float       ukj, ukj1, ukj2;
00081     int          luptr, luptr1, luptr2;
00082     int          segsze;
00083     int          block_nrow;  /* no of rows in a block row */
00084     register int lptr;        /* Points to the row subscripts of a supernode */
00085     int          kfnz, irow, no_zeros; 
00086     register int isub, isub1, i;
00087     register int jj;          /* Index through each column in the panel */
00088     int          *xsup, *supno;
00089     int          *lsub, *xlsub;
00090     float       *lusup;
00091     int          *xlusup;
00092     int          *repfnz_col; /* repfnz[] for a column in the panel */
00093     float       *dense_col;  /* dense[] for a column in the panel */
00094     float       *tempv1;             /* Used in 1-D update */
00095     float       *TriTmp, *MatvecTmp; /* used in 2-D update */
00096     float      zero = 0.0;
00097     register int ldaTmp;
00098     register int r_ind, r_hi;
00099     static   int first = 1, maxsuper, rowblk, colblk;
00100     flops_t  *ops = stat->ops;
00101     
00102     xsup    = Glu->xsup;
00103     supno   = Glu->supno;
00104     lsub    = Glu->lsub;
00105     xlsub   = Glu->xlsub;
00106     lusup   = Glu->lusup;
00107     xlusup  = Glu->xlusup;
00108     
00109     if ( first ) {
00110     maxsuper = sp_ienv(3);
00111     rowblk   = sp_ienv(4);
00112     colblk   = sp_ienv(5);
00113     first = 0;
00114     }
00115     ldaTmp = maxsuper + rowblk;
00116 
00117     /* 
00118      * For each nonz supernode segment of U[*,j] in topological order 
00119      */
00120     k = nseg - 1;
00121     for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
00122 
00123     /* krep = representative of current k-th supernode
00124      * fsupc = first supernodal column
00125      * nsupc = no of columns in a supernode
00126      * nsupr = no of rows in a supernode
00127      */
00128         krep = segrep[k--];
00129     fsupc = xsup[supno[krep]];
00130     nsupc = krep - fsupc + 1;
00131     nsupr = xlsub[fsupc+1] - xlsub[fsupc];
00132     nrow = nsupr - nsupc;
00133     lptr = xlsub[fsupc];
00134     krep_ind = lptr + nsupc - 1;
00135 
00136     repfnz_col = repfnz;
00137     dense_col = dense;
00138     
00139     if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
00140 
00141         TriTmp = tempv;
00142     
00143         /* Sequence through each column in panel -- triangular solves */
00144         for (jj = jcol; jj < jcol + w; jj++,
00145          repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
00146 
00147         kfnz = repfnz_col[krep];
00148         if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
00149         
00150         segsze = krep - kfnz + 1;
00151         luptr = xlusup[fsupc];
00152 
00153         ops[TRSV] += segsze * (segsze - 1);
00154         ops[GEMV] += 2 * nrow * segsze;
00155     
00156         /* Case 1: Update U-segment of size 1 -- col-col update */
00157         if ( segsze == 1 ) {
00158             ukj = dense_col[lsub[krep_ind]];
00159             luptr += nsupr*(nsupc-1) + nsupc;
00160 
00161             for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
00162             irow = lsub[i];
00163             dense_col[irow] -= ukj * lusup[luptr];
00164             ++luptr;
00165             }
00166 
00167         } else if ( segsze <= 3 ) {
00168             ukj = dense_col[lsub[krep_ind]];
00169             ukj1 = dense_col[lsub[krep_ind - 1]];
00170             luptr += nsupr*(nsupc-1) + nsupc-1;
00171             luptr1 = luptr - nsupr;
00172 
00173             if ( segsze == 2 ) {
00174             ukj -= ukj1 * lusup[luptr1];
00175             dense_col[lsub[krep_ind]] = ukj;
00176             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00177                 irow = lsub[i];
00178                 luptr++; luptr1++;
00179                 dense_col[irow] -= (ukj*lusup[luptr]
00180                         + ukj1*lusup[luptr1]);
00181             }
00182             } else {
00183             ukj2 = dense_col[lsub[krep_ind - 2]];
00184             luptr2 = luptr1 - nsupr;
00185             ukj1 -= ukj2 * lusup[luptr2-1];
00186             ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
00187             dense_col[lsub[krep_ind]] = ukj;
00188             dense_col[lsub[krep_ind-1]] = ukj1;
00189             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00190                 irow = lsub[i];
00191                 luptr++; luptr1++; luptr2++;
00192                 dense_col[irow] -= ( ukj*lusup[luptr]
00193                              + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
00194             }
00195             }
00196 
00197         } else  {   /* segsze >= 4 */
00198             
00199             /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
00200                holds the result of triangular solves.    */
00201             no_zeros = kfnz - fsupc;
00202             isub = lptr + no_zeros;
00203             for (i = 0; i < segsze; ++i) {
00204             irow = lsub[isub];
00205             TriTmp[i] = dense_col[irow]; /* Gather */
00206             ++isub;
00207             }
00208             
00209             /* start effective triangle */
00210             luptr += nsupr * no_zeros + no_zeros;
00211 
00212 #ifdef USE_VENDOR_BLAS
00213 #ifdef _CRAY
00214             STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
00215                &nsupr, TriTmp, &incx );
00216 #else
00217             strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
00218                &nsupr, TriTmp, &incx );
00219 #endif
00220 #else       
00221             slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
00222 #endif
00223             
00224 
00225         } /* else ... */
00226         
00227         }  /* for jj ... end tri-solves */
00228 
00229         /* Block row updates; push all the way into dense[*] block */
00230         for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
00231         
00232         r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
00233         block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
00234         luptr = xlusup[fsupc] + nsupc + r_ind;
00235         isub1 = lptr + nsupc + r_ind;
00236         
00237         repfnz_col = repfnz;
00238         TriTmp = tempv;
00239         dense_col = dense;
00240         
00241         /* Sequence through each column in panel -- matrix-vector */
00242         for (jj = jcol; jj < jcol + w; jj++,
00243              repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
00244             
00245             kfnz = repfnz_col[krep];
00246             if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
00247             
00248             segsze = krep - kfnz + 1;
00249             if ( segsze <= 3 ) continue;   /* skip unrolled cases */
00250             
00251             /* Perform a block update, and scatter the result of
00252                matrix-vector to dense[].         */
00253             no_zeros = kfnz - fsupc;
00254             luptr1 = luptr + nsupr * no_zeros;
00255             MatvecTmp = &TriTmp[maxsuper];
00256             
00257 #ifdef USE_VENDOR_BLAS
00258             alpha = one; 
00259                     beta = zero;
00260 #ifdef _CRAY
00261             SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
00262                &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
00263 #else
00264             sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
00265                &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
00266 #endif
00267 #else
00268             smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
00269                TriTmp, MatvecTmp);
00270 #endif
00271             
00272             /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
00273              * such that MatvecTmp[*] can be re-used for the
00274              * the next blok row update. dense[] will be copied into 
00275              * global store after the whole panel has been finished.
00276              */
00277             isub = isub1;
00278             for (i = 0; i < block_nrow; i++) {
00279             irow = lsub[isub];
00280             dense_col[irow] -= MatvecTmp[i];
00281             MatvecTmp[i] = zero;
00282             ++isub;
00283             }
00284             
00285         } /* for jj ... */
00286         
00287         } /* for each block row ... */
00288         
00289         /* Scatter the triangular solves into SPA dense[*] */
00290         repfnz_col = repfnz;
00291         TriTmp = tempv;
00292         dense_col = dense;
00293         
00294         for (jj = jcol; jj < jcol + w; jj++,
00295          repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
00296         kfnz = repfnz_col[krep];
00297         if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
00298         
00299         segsze = krep - kfnz + 1;
00300         if ( segsze <= 3 ) continue; /* skip unrolled cases */
00301         
00302         no_zeros = kfnz - fsupc;        
00303         isub = lptr + no_zeros;
00304         for (i = 0; i < segsze; i++) {
00305             irow = lsub[isub];
00306             dense_col[irow] = TriTmp[i];
00307             TriTmp[i] = zero;
00308             ++isub;
00309         }
00310         
00311         } /* for jj ... */
00312         
00313     } else { /* 1-D block modification */
00314         
00315         
00316         /* Sequence through each column in the panel */
00317         for (jj = jcol; jj < jcol + w; jj++,
00318          repfnz_col += m, dense_col += m) {
00319         
00320         kfnz = repfnz_col[krep];
00321         if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
00322         
00323         segsze = krep - kfnz + 1;
00324         luptr = xlusup[fsupc];
00325 
00326         ops[TRSV] += segsze * (segsze - 1);
00327         ops[GEMV] += 2 * nrow * segsze;
00328         
00329         /* Case 1: Update U-segment of size 1 -- col-col update */
00330         if ( segsze == 1 ) {
00331             ukj = dense_col[lsub[krep_ind]];
00332             luptr += nsupr*(nsupc-1) + nsupc;
00333 
00334             for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
00335             irow = lsub[i];
00336             dense_col[irow] -= ukj * lusup[luptr];
00337             ++luptr;
00338             }
00339 
00340         } else if ( segsze <= 3 ) {
00341             ukj = dense_col[lsub[krep_ind]];
00342             luptr += nsupr*(nsupc-1) + nsupc-1;
00343             ukj1 = dense_col[lsub[krep_ind - 1]];
00344             luptr1 = luptr - nsupr;
00345 
00346             if ( segsze == 2 ) {
00347             ukj -= ukj1 * lusup[luptr1];
00348             dense_col[lsub[krep_ind]] = ukj;
00349             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00350                 irow = lsub[i];
00351                 ++luptr;  ++luptr1;
00352                 dense_col[irow] -= (ukj*lusup[luptr]
00353                         + ukj1*lusup[luptr1]);
00354             }
00355             } else {
00356             ukj2 = dense_col[lsub[krep_ind - 2]];
00357             luptr2 = luptr1 - nsupr;
00358             ukj1 -= ukj2 * lusup[luptr2-1];
00359             ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
00360             dense_col[lsub[krep_ind]] = ukj;
00361             dense_col[lsub[krep_ind-1]] = ukj1;
00362             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00363                 irow = lsub[i];
00364                 ++luptr; ++luptr1; ++luptr2;
00365                 dense_col[irow] -= ( ukj*lusup[luptr]
00366                              + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
00367             }
00368             }
00369 
00370         } else  { /* segsze >= 4 */
00371             /* 
00372              * Perform a triangular solve and block update,
00373              * then scatter the result of sup-col update to dense[].
00374              */
00375             no_zeros = kfnz - fsupc;
00376             
00377             /* Copy U[*,j] segment from dense[*] to tempv[*]: 
00378              *    The result of triangular solve is in tempv[*];
00379              *    The result of matrix vector update is in dense_col[*]
00380              */
00381             isub = lptr + no_zeros;
00382             for (i = 0; i < segsze; ++i) {
00383             irow = lsub[isub];
00384             tempv[i] = dense_col[irow]; /* Gather */
00385             ++isub;
00386             }
00387             
00388             /* start effective triangle */
00389             luptr += nsupr * no_zeros + no_zeros;
00390             
00391 #ifdef USE_VENDOR_BLAS
00392 #ifdef _CRAY
00393             STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
00394                &nsupr, tempv, &incx );
00395 #else
00396             strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
00397                &nsupr, tempv, &incx );
00398 #endif
00399             
00400             luptr += segsze;    /* Dense matrix-vector */
00401             tempv1 = &tempv[segsze];
00402                     alpha = one;
00403                     beta = zero;
00404 #ifdef _CRAY
00405             SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
00406                &nsupr, tempv, &incx, &beta, tempv1, &incy );
00407 #else
00408             sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
00409                &nsupr, tempv, &incx, &beta, tempv1, &incy );
00410 #endif
00411 #else
00412             slsolve ( nsupr, segsze, &lusup[luptr], tempv );
00413             
00414             luptr += segsze;        /* Dense matrix-vector */
00415             tempv1 = &tempv[segsze];
00416             smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
00417 #endif
00418             
00419             /* Scatter tempv[*] into SPA dense[*] temporarily, such
00420              * that tempv[*] can be used for the triangular solve of
00421              * the next column of the panel. They will be copied into 
00422              * ucol[*] after the whole panel has been finished.
00423              */
00424             isub = lptr + no_zeros;
00425             for (i = 0; i < segsze; i++) {
00426             irow = lsub[isub];
00427             dense_col[irow] = tempv[i];
00428             tempv[i] = zero;
00429             isub++;
00430             }
00431             
00432             /* Scatter the update from tempv1[*] into SPA dense[*] */
00433             /* Start dense rectangular L */
00434             for (i = 0; i < nrow; i++) {
00435             irow = lsub[isub];
00436             dense_col[irow] -= tempv1[i];
00437             tempv1[i] = zero;
00438             ++isub; 
00439             }
00440             
00441         } /* else segsze>=4 ... */
00442         
00443         } /* for each column in the panel... */
00444         
00445     } /* else 1-D update ... */
00446 
00447     } /* for each updating supernode ... */
00448 
00449 }
00450 
00451 
00452