Blender V2.61 - r43446

smyblas2.c

Go to the documentation of this file.
00001 
00006 /*
00007  * -- SuperLU routine (version 2.0) --
00008  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00009  * and Lawrence Berkeley National Lab.
00010  * November 15, 1997
00011  *
00012  */
00013 /*
00014  * File name:       smyblas2.c
00015  * Purpose:
00016  *     Level 2 BLAS operations: solves and matvec, written in C.
00017  * Note:
00018  *     This is only used when the system lacks an efficient BLAS library.
00019  */
00020 
00021 /*
00022  * Solves a dense UNIT lower triangular system. The unit lower 
00023  * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
00024  * The solution will be returned in the rhs vector.
00025  */
00026 
00027 /* local prototypes*/
00028 void slsolve ( int, int, float *, float *);
00029 void susolve ( int, int, float *, float *);
00030 void smatvec ( int, int, int, float *, float *, float *);
00031 
00032 
00033 void slsolve ( int ldm, int ncol, float *M, float *rhs )
00034 {
00035     int k;
00036     float x0, x1, x2, x3, x4, x5, x6, x7;
00037     float *M0;
00038     register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
00039     register int firstcol = 0;
00040 
00041     M0 = &M[0];
00042 
00043     while ( firstcol < ncol - 7 ) { /* Do 8 columns */
00044       Mki0 = M0 + 1;
00045       Mki1 = Mki0 + ldm + 1;
00046       Mki2 = Mki1 + ldm + 1;
00047       Mki3 = Mki2 + ldm + 1;
00048       Mki4 = Mki3 + ldm + 1;
00049       Mki5 = Mki4 + ldm + 1;
00050       Mki6 = Mki5 + ldm + 1;
00051       Mki7 = Mki6 + ldm + 1;
00052 
00053       x0 = rhs[firstcol];
00054       x1 = rhs[firstcol+1] - x0 * *Mki0++;
00055       x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
00056       x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
00057       x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
00058                        - x3 * *Mki3++;
00059       x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
00060                        - x3 * *Mki3++ - x4 * *Mki4++;
00061       x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
00062                        - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
00063       x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
00064                        - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
00065                - x6 * *Mki6++;
00066 
00067       rhs[++firstcol] = x1;
00068       rhs[++firstcol] = x2;
00069       rhs[++firstcol] = x3;
00070       rhs[++firstcol] = x4;
00071       rhs[++firstcol] = x5;
00072       rhs[++firstcol] = x6;
00073       rhs[++firstcol] = x7;
00074       ++firstcol;
00075     
00076       for (k = firstcol; k < ncol; k++)
00077     rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
00078                     - x2 * *Mki2++ - x3 * *Mki3++
00079                         - x4 * *Mki4++ - x5 * *Mki5++
00080             - x6 * *Mki6++ - x7 * *Mki7++;
00081  
00082       M0 += 8 * ldm + 8;
00083     }
00084 
00085     while ( firstcol < ncol - 3 ) { /* Do 4 columns */
00086       Mki0 = M0 + 1;
00087       Mki1 = Mki0 + ldm + 1;
00088       Mki2 = Mki1 + ldm + 1;
00089       Mki3 = Mki2 + ldm + 1;
00090 
00091       x0 = rhs[firstcol];
00092       x1 = rhs[firstcol+1] - x0 * *Mki0++;
00093       x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
00094       x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
00095 
00096       rhs[++firstcol] = x1;
00097       rhs[++firstcol] = x2;
00098       rhs[++firstcol] = x3;
00099       ++firstcol;
00100     
00101       for (k = firstcol; k < ncol; k++)
00102     rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
00103                     - x2 * *Mki2++ - x3 * *Mki3++;
00104  
00105       M0 += 4 * ldm + 4;
00106     }
00107 
00108     if ( firstcol < ncol - 1 ) { /* Do 2 columns */
00109       Mki0 = M0 + 1;
00110       Mki1 = Mki0 + ldm + 1;
00111 
00112       x0 = rhs[firstcol];
00113       x1 = rhs[firstcol+1] - x0 * *Mki0++;
00114 
00115       rhs[++firstcol] = x1;
00116       ++firstcol;
00117     
00118       for (k = firstcol; k < ncol; k++)
00119     rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
00120  
00121     }
00122     
00123 }
00124 
00125 /*
00126  * Solves a dense upper triangular system. The upper triangular matrix is
00127  * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
00128  * in the rhs vector.
00129  */
00130 void
00131 susolve ( ldm, ncol, M, rhs )
00132 int ldm;    /* in */
00133 int ncol;   /* in */
00134 float *M;   /* in */
00135 float *rhs; /* modified */
00136 {
00137     float xj;
00138     int jcol, j, irow;
00139 
00140     jcol = ncol - 1;
00141 
00142     for (j = 0; j < ncol; j++) {
00143 
00144     xj = rhs[jcol] / M[jcol + jcol*ldm];        /* M(jcol, jcol) */
00145     rhs[jcol] = xj;
00146     
00147     for (irow = 0; irow < jcol; irow++)
00148         rhs[irow] -= xj * M[irow + jcol*ldm];   /* M(irow, jcol) */
00149 
00150     jcol--;
00151 
00152     }
00153 }
00154 
00155 
00156 /*
00157  * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
00158  * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
00159  */
00160 void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
00161 
00162 int ldm;    /* in -- leading dimension of M */
00163 int nrow;   /* in */ 
00164 int ncol;   /* in */
00165 float *M;   /* in */
00166 float *vec; /* in */
00167 float *Mxvec;   /* in/out */
00168 
00169 {
00170     float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
00171     float *M0;
00172     register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
00173     register int firstcol = 0;
00174     int k;
00175 
00176     M0 = &M[0];
00177     while ( firstcol < ncol - 7 ) { /* Do 8 columns */
00178 
00179     Mki0 = M0;
00180     Mki1 = Mki0 + ldm;
00181         Mki2 = Mki1 + ldm;
00182         Mki3 = Mki2 + ldm;
00183     Mki4 = Mki3 + ldm;
00184     Mki5 = Mki4 + ldm;
00185     Mki6 = Mki5 + ldm;
00186     Mki7 = Mki6 + ldm;
00187 
00188     vi0 = vec[firstcol++];
00189     vi1 = vec[firstcol++];
00190     vi2 = vec[firstcol++];
00191     vi3 = vec[firstcol++];  
00192     vi4 = vec[firstcol++];
00193     vi5 = vec[firstcol++];
00194     vi6 = vec[firstcol++];
00195     vi7 = vec[firstcol++];  
00196 
00197     for (k = 0; k < nrow; k++) 
00198         Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
00199               + vi2 * *Mki2++ + vi3 * *Mki3++ 
00200               + vi4 * *Mki4++ + vi5 * *Mki5++
00201               + vi6 * *Mki6++ + vi7 * *Mki7++;
00202 
00203     M0 += 8 * ldm;
00204     }
00205 
00206     while ( firstcol < ncol - 3 ) { /* Do 4 columns */
00207 
00208     Mki0 = M0;
00209     Mki1 = Mki0 + ldm;
00210     Mki2 = Mki1 + ldm;
00211     Mki3 = Mki2 + ldm;
00212 
00213     vi0 = vec[firstcol++];
00214     vi1 = vec[firstcol++];
00215     vi2 = vec[firstcol++];
00216     vi3 = vec[firstcol++];  
00217     for (k = 0; k < nrow; k++) 
00218         Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
00219               + vi2 * *Mki2++ + vi3 * *Mki3++ ;
00220 
00221     M0 += 4 * ldm;
00222     }
00223 
00224     while ( firstcol < ncol ) {     /* Do 1 column */
00225 
00226     Mki0 = M0;
00227     vi0 = vec[firstcol++];
00228     for (k = 0; k < nrow; k++)
00229         Mxvec[k] += vi0 * *Mki0++;
00230 
00231     M0 += ldm;
00232     }
00233     
00234 }
00235