Blender V2.61 - r43446
|
00001 00006 /* 00007 * -- SuperLU routine (version 2.0) -- 00008 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00009 * and Lawrence Berkeley National Lab. 00010 * November 15, 1997 00011 * 00012 */ 00013 /* 00014 * File name: smyblas2.c 00015 * Purpose: 00016 * Level 2 BLAS operations: solves and matvec, written in C. 00017 * Note: 00018 * This is only used when the system lacks an efficient BLAS library. 00019 */ 00020 00021 /* 00022 * Solves a dense UNIT lower triangular system. The unit lower 00023 * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 00024 * The solution will be returned in the rhs vector. 00025 */ 00026 00027 /* local prototypes*/ 00028 void slsolve ( int, int, float *, float *); 00029 void susolve ( int, int, float *, float *); 00030 void smatvec ( int, int, int, float *, float *, float *); 00031 00032 00033 void slsolve ( int ldm, int ncol, float *M, float *rhs ) 00034 { 00035 int k; 00036 float x0, x1, x2, x3, x4, x5, x6, x7; 00037 float *M0; 00038 register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; 00039 register int firstcol = 0; 00040 00041 M0 = &M[0]; 00042 00043 while ( firstcol < ncol - 7 ) { /* Do 8 columns */ 00044 Mki0 = M0 + 1; 00045 Mki1 = Mki0 + ldm + 1; 00046 Mki2 = Mki1 + ldm + 1; 00047 Mki3 = Mki2 + ldm + 1; 00048 Mki4 = Mki3 + ldm + 1; 00049 Mki5 = Mki4 + ldm + 1; 00050 Mki6 = Mki5 + ldm + 1; 00051 Mki7 = Mki6 + ldm + 1; 00052 00053 x0 = rhs[firstcol]; 00054 x1 = rhs[firstcol+1] - x0 * *Mki0++; 00055 x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; 00056 x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; 00057 x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ 00058 - x3 * *Mki3++; 00059 x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ 00060 - x3 * *Mki3++ - x4 * *Mki4++; 00061 x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ 00062 - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++; 00063 x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ 00064 - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ 00065 - x6 * *Mki6++; 00066 00067 rhs[++firstcol] = x1; 00068 rhs[++firstcol] = x2; 00069 rhs[++firstcol] = x3; 00070 rhs[++firstcol] = x4; 00071 rhs[++firstcol] = x5; 00072 rhs[++firstcol] = x6; 00073 rhs[++firstcol] = x7; 00074 ++firstcol; 00075 00076 for (k = firstcol; k < ncol; k++) 00077 rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ 00078 - x2 * *Mki2++ - x3 * *Mki3++ 00079 - x4 * *Mki4++ - x5 * *Mki5++ 00080 - x6 * *Mki6++ - x7 * *Mki7++; 00081 00082 M0 += 8 * ldm + 8; 00083 } 00084 00085 while ( firstcol < ncol - 3 ) { /* Do 4 columns */ 00086 Mki0 = M0 + 1; 00087 Mki1 = Mki0 + ldm + 1; 00088 Mki2 = Mki1 + ldm + 1; 00089 Mki3 = Mki2 + ldm + 1; 00090 00091 x0 = rhs[firstcol]; 00092 x1 = rhs[firstcol+1] - x0 * *Mki0++; 00093 x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; 00094 x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; 00095 00096 rhs[++firstcol] = x1; 00097 rhs[++firstcol] = x2; 00098 rhs[++firstcol] = x3; 00099 ++firstcol; 00100 00101 for (k = firstcol; k < ncol; k++) 00102 rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ 00103 - x2 * *Mki2++ - x3 * *Mki3++; 00104 00105 M0 += 4 * ldm + 4; 00106 } 00107 00108 if ( firstcol < ncol - 1 ) { /* Do 2 columns */ 00109 Mki0 = M0 + 1; 00110 Mki1 = Mki0 + ldm + 1; 00111 00112 x0 = rhs[firstcol]; 00113 x1 = rhs[firstcol+1] - x0 * *Mki0++; 00114 00115 rhs[++firstcol] = x1; 00116 ++firstcol; 00117 00118 for (k = firstcol; k < ncol; k++) 00119 rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++; 00120 00121 } 00122 00123 } 00124 00125 /* 00126 * Solves a dense upper triangular system. The upper triangular matrix is 00127 * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned 00128 * in the rhs vector. 00129 */ 00130 void 00131 susolve ( ldm, ncol, M, rhs ) 00132 int ldm; /* in */ 00133 int ncol; /* in */ 00134 float *M; /* in */ 00135 float *rhs; /* modified */ 00136 { 00137 float xj; 00138 int jcol, j, irow; 00139 00140 jcol = ncol - 1; 00141 00142 for (j = 0; j < ncol; j++) { 00143 00144 xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */ 00145 rhs[jcol] = xj; 00146 00147 for (irow = 0; irow < jcol; irow++) 00148 rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */ 00149 00150 jcol--; 00151 00152 } 00153 } 00154 00155 00156 /* 00157 * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. 00158 * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. 00159 */ 00160 void smatvec ( ldm, nrow, ncol, M, vec, Mxvec ) 00161 00162 int ldm; /* in -- leading dimension of M */ 00163 int nrow; /* in */ 00164 int ncol; /* in */ 00165 float *M; /* in */ 00166 float *vec; /* in */ 00167 float *Mxvec; /* in/out */ 00168 00169 { 00170 float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7; 00171 float *M0; 00172 register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; 00173 register int firstcol = 0; 00174 int k; 00175 00176 M0 = &M[0]; 00177 while ( firstcol < ncol - 7 ) { /* Do 8 columns */ 00178 00179 Mki0 = M0; 00180 Mki1 = Mki0 + ldm; 00181 Mki2 = Mki1 + ldm; 00182 Mki3 = Mki2 + ldm; 00183 Mki4 = Mki3 + ldm; 00184 Mki5 = Mki4 + ldm; 00185 Mki6 = Mki5 + ldm; 00186 Mki7 = Mki6 + ldm; 00187 00188 vi0 = vec[firstcol++]; 00189 vi1 = vec[firstcol++]; 00190 vi2 = vec[firstcol++]; 00191 vi3 = vec[firstcol++]; 00192 vi4 = vec[firstcol++]; 00193 vi5 = vec[firstcol++]; 00194 vi6 = vec[firstcol++]; 00195 vi7 = vec[firstcol++]; 00196 00197 for (k = 0; k < nrow; k++) 00198 Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ 00199 + vi2 * *Mki2++ + vi3 * *Mki3++ 00200 + vi4 * *Mki4++ + vi5 * *Mki5++ 00201 + vi6 * *Mki6++ + vi7 * *Mki7++; 00202 00203 M0 += 8 * ldm; 00204 } 00205 00206 while ( firstcol < ncol - 3 ) { /* Do 4 columns */ 00207 00208 Mki0 = M0; 00209 Mki1 = Mki0 + ldm; 00210 Mki2 = Mki1 + ldm; 00211 Mki3 = Mki2 + ldm; 00212 00213 vi0 = vec[firstcol++]; 00214 vi1 = vec[firstcol++]; 00215 vi2 = vec[firstcol++]; 00216 vi3 = vec[firstcol++]; 00217 for (k = 0; k < nrow; k++) 00218 Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ 00219 + vi2 * *Mki2++ + vi3 * *Mki3++ ; 00220 00221 M0 += 4 * ldm; 00222 } 00223 00224 while ( firstcol < ncol ) { /* Do 1 column */ 00225 00226 Mki0 = M0; 00227 vi0 = vec[firstcol++]; 00228 for (k = 0; k < nrow; k++) 00229 Mxvec[k] += vi0 * *Mki0++; 00230 00231 M0 += ldm; 00232 } 00233 00234 } 00235