Blender V2.61 - r43446
|
00001 00005 /* 00006 * -- SuperLU routine (version 3.0) -- 00007 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 00008 * and Lawrence Berkeley National Lab. 00009 * October 15, 2003 00010 * 00011 */ 00012 /* 00013 Copyright (c) 1994 by Xerox Corporation. All rights reserved. 00014 00015 THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 00016 EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 00017 00018 Permission is hereby granted to use or copy this program for any 00019 purpose, provided the above notices are retained on all copies. 00020 Permission to modify the code and to distribute modified code is 00021 granted, provided the above notices are retained, and a notice that 00022 the code was modified is included with the above copyright notice. 00023 */ 00024 00025 #include "ssp_defs.h" 00026 00027 void slsolve(int, int, float*, float*); 00028 void smatvec(int, int, int, float*, float*, float*); 00029 00030 /* 00031 * Performs numeric block updates within the relaxed snode. 00032 */ 00033 int 00034 ssnode_bmod ( 00035 const int jcol, /* in */ 00036 const int fsupc, /* in */ 00037 float *dense, /* in */ 00038 float *tempv, /* working array */ 00039 GlobalLU_t *Glu, /* modified */ 00040 SuperLUStat_t *stat /* output */ 00041 ) 00042 { 00043 #ifdef USE_VENDOR_BLAS 00044 #ifdef _CRAY 00045 _fcd ftcs1 = _cptofcd("L", strlen("L")), 00046 ftcs2 = _cptofcd("N", strlen("N")), 00047 ftcs3 = _cptofcd("U", strlen("U")); 00048 #endif 00049 int incx = 1, incy = 1; 00050 float alpha = -1.0, beta = 1.0; 00051 #endif 00052 00053 int luptr, nsupc, nsupr, nrow; 00054 int isub, irow, i, iptr; 00055 register int ufirst, nextlu; 00056 int *lsub, *xlsub; 00057 float *lusup; 00058 int *xlusup; 00059 flops_t *ops = stat->ops; 00060 00061 lsub = Glu->lsub; 00062 xlsub = Glu->xlsub; 00063 lusup = Glu->lusup; 00064 xlusup = Glu->xlusup; 00065 00066 nextlu = xlusup[jcol]; 00067 00068 /* 00069 * Process the supernodal portion of L\U[*,j] 00070 */ 00071 for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) { 00072 irow = lsub[isub]; 00073 lusup[nextlu] = dense[irow]; 00074 dense[irow] = 0; 00075 ++nextlu; 00076 } 00077 00078 xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */ 00079 00080 if ( fsupc < jcol ) { 00081 00082 luptr = xlusup[fsupc]; 00083 nsupr = xlsub[fsupc+1] - xlsub[fsupc]; 00084 nsupc = jcol - fsupc; /* Excluding jcol */ 00085 ufirst = xlusup[jcol]; /* Points to the beginning of column 00086 jcol in supernode L\U(jsupno). */ 00087 nrow = nsupr - nsupc; 00088 00089 ops[TRSV] += nsupc * (nsupc - 1); 00090 ops[GEMV] += 2 * nrow * nsupc; 00091 00092 #ifdef USE_VENDOR_BLAS 00093 #ifdef _CRAY 00094 STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 00095 &lusup[ufirst], &incx ); 00096 SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 00097 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); 00098 #else 00099 strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 00100 &lusup[ufirst], &incx ); 00101 sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 00102 &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); 00103 #endif 00104 #else 00105 slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); 00106 smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 00107 &lusup[ufirst], &tempv[0] ); 00108 00109 /* Scatter tempv[*] into lusup[*] */ 00110 iptr = ufirst + nsupc; 00111 for (i = 0; i < nrow; i++) { 00112 lusup[iptr++] -= tempv[i]; 00113 tempv[i] = 0.0; 00114 } 00115 #endif 00116 00117 } 00118 00119 return 0; 00120 }