Blender V2.61 - r43446
|
00001 00004 /* 00005 * 00006 * OpenNL: Numerical Library 00007 * Copyright (C) 2004 Bruno Levy 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00022 * 00023 * If you modify this software, you should include a notice giving the 00024 * name of the person performing the modification, the date of modification, 00025 * and the reason for such modification. 00026 * 00027 * Contact: Bruno Levy 00028 * 00029 * levy@loria.fr 00030 * 00031 * ISA Project 00032 * LORIA, INRIA Lorraine, 00033 * Campus Scientifique, BP 239 00034 * 54506 VANDOEUVRE LES NANCY CEDEX 00035 * FRANCE 00036 * 00037 * Note that the GNU General Public License does not permit incorporating 00038 * the Software into proprietary programs. 00039 */ 00040 00041 #include "ONL_opennl.h" 00042 00043 #include <stdlib.h> 00044 #include <stdio.h> 00045 #include <string.h> 00046 #include <math.h> 00047 00048 #ifdef NL_PARANOID 00049 #ifndef NL_DEBUG 00050 #define NL_DEBUG 00051 #endif 00052 #endif 00053 00054 /* SuperLU includes */ 00055 #include <ssp_defs.h> 00056 #include <util.h> 00057 00058 /************************************************************************************/ 00059 /* Assertions */ 00060 00061 00062 static void __nl_assertion_failed(char* cond, char* file, int line) { 00063 fprintf( 00064 stderr, 00065 "OpenNL assertion failed: %s, file:%s, line:%d\n", 00066 cond,file,line 00067 ); 00068 abort(); 00069 } 00070 00071 static void __nl_range_assertion_failed( 00072 float x, float min_val, float max_val, char* file, int line 00073 ) { 00074 fprintf( 00075 stderr, 00076 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n", 00077 x, min_val, max_val, file,line 00078 ); 00079 abort(); 00080 } 00081 00082 static void __nl_should_not_have_reached(char* file, int line) { 00083 fprintf( 00084 stderr, 00085 "OpenNL should not have reached this point: file:%s, line:%d\n", 00086 file,line 00087 ); 00088 abort(); 00089 } 00090 00091 00092 #define __nl_assert(x) { \ 00093 if(!(x)) { \ 00094 __nl_assertion_failed(#x,__FILE__, __LINE__); \ 00095 } \ 00096 } 00097 00098 #define __nl_range_assert(x,min_val,max_val) { \ 00099 if(((x) < (min_val)) || ((x) > (max_val))) { \ 00100 __nl_range_assertion_failed(x, min_val, max_val, \ 00101 __FILE__, __LINE__ \ 00102 ); \ 00103 } \ 00104 } 00105 00106 #define __nl_assert_not_reached { \ 00107 __nl_should_not_have_reached(__FILE__, __LINE__); \ 00108 } 00109 00110 #ifdef NL_DEBUG 00111 #define __nl_debug_assert(x) __nl_assert(x) 00112 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val) 00113 #else 00114 #define __nl_debug_assert(x) 00115 #define __nl_debug_range_assert(x,min_val,max_val) 00116 #endif 00117 00118 #ifdef NL_PARANOID 00119 #define __nl_parano_assert(x) __nl_assert(x) 00120 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val) 00121 #else 00122 #define __nl_parano_assert(x) 00123 #define __nl_parano_range_assert(x,min_val,max_val) 00124 #endif 00125 00126 /************************************************************************************/ 00127 /* classic macros */ 00128 00129 #ifndef MIN 00130 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 00131 #endif 00132 00133 #ifndef MAX 00134 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 00135 #endif 00136 00137 /************************************************************************************/ 00138 /* memory management */ 00139 00140 #define __NL_NEW(T) (T*)(calloc(1, sizeof(T))) 00141 #define __NL_NEW_ARRAY(T,NB) (T*)(calloc((NB),sizeof(T))) 00142 #define __NL_RENEW_ARRAY(T,x,NB) (T*)(realloc(x,(NB)*sizeof(T))) 00143 #define __NL_DELETE(x) free(x); x = NULL 00144 #define __NL_DELETE_ARRAY(x) free(x); x = NULL 00145 00146 #define __NL_CLEAR(T, x) memset(x, 0, sizeof(T)) 00147 #define __NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (NB)*sizeof(T)) 00148 00149 /************************************************************************************/ 00150 /* Dynamic arrays for sparse row/columns */ 00151 00152 typedef struct { 00153 NLuint index; 00154 NLfloat value; 00155 } __NLCoeff; 00156 00157 typedef struct { 00158 NLuint size; 00159 NLuint capacity; 00160 __NLCoeff* coeff; 00161 } __NLRowColumn; 00162 00163 static void __nlRowColumnConstruct(__NLRowColumn* c) { 00164 c->size = 0; 00165 c->capacity = 0; 00166 c->coeff = NULL; 00167 } 00168 00169 static void __nlRowColumnDestroy(__NLRowColumn* c) { 00170 __NL_DELETE_ARRAY(c->coeff); 00171 #ifdef NL_PARANOID 00172 __NL_CLEAR(__NLRowColumn, c); 00173 #endif 00174 } 00175 00176 static void __nlRowColumnGrow(__NLRowColumn* c) { 00177 if(c->capacity != 0) { 00178 c->capacity = 2 * c->capacity; 00179 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity); 00180 } else { 00181 c->capacity = 4; 00182 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity); 00183 } 00184 } 00185 00186 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) { 00187 NLuint i; 00188 for(i=0; i<c->size; i++) { 00189 if(c->coeff[i].index == (NLuint)index) { 00190 c->coeff[i].value += value; 00191 return; 00192 } 00193 } 00194 if(c->size == c->capacity) { 00195 __nlRowColumnGrow(c); 00196 } 00197 c->coeff[c->size].index = index; 00198 c->coeff[c->size].value = value; 00199 c->size++; 00200 } 00201 00202 /* Does not check whether the index already exists */ 00203 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) { 00204 if(c->size == c->capacity) { 00205 __nlRowColumnGrow(c); 00206 } 00207 c->coeff[c->size].index = index; 00208 c->coeff[c->size].value = value; 00209 c->size++; 00210 } 00211 00212 static void __nlRowColumnClear(__NLRowColumn* c) { 00213 c->size = 0; 00214 c->capacity = 0; 00215 __NL_DELETE_ARRAY(c->coeff); 00216 } 00217 00218 /************************************************************************************/ 00219 /* SparseMatrix data structure */ 00220 00221 #define __NL_ROWS 1 00222 #define __NL_COLUMNS 2 00223 #define __NL_SYMMETRIC 4 00224 00225 typedef struct { 00226 NLuint m; 00227 NLuint n; 00228 NLuint diag_size; 00229 NLenum storage; 00230 __NLRowColumn* row; 00231 __NLRowColumn* column; 00232 NLfloat* diag; 00233 } __NLSparseMatrix; 00234 00235 00236 static void __nlSparseMatrixConstruct( 00237 __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage 00238 ) { 00239 NLuint i; 00240 M->m = m; 00241 M->n = n; 00242 M->storage = storage; 00243 if(storage & __NL_ROWS) { 00244 M->row = __NL_NEW_ARRAY(__NLRowColumn, m); 00245 for(i=0; i<m; i++) { 00246 __nlRowColumnConstruct(&(M->row[i])); 00247 } 00248 } else { 00249 M->row = NULL; 00250 } 00251 00252 if(storage & __NL_COLUMNS) { 00253 M->column = __NL_NEW_ARRAY(__NLRowColumn, n); 00254 for(i=0; i<n; i++) { 00255 __nlRowColumnConstruct(&(M->column[i])); 00256 } 00257 } else { 00258 M->column = NULL; 00259 } 00260 00261 M->diag_size = MIN(m,n); 00262 M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size); 00263 } 00264 00265 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) { 00266 NLuint i; 00267 __NL_DELETE_ARRAY(M->diag); 00268 if(M->storage & __NL_ROWS) { 00269 for(i=0; i<M->m; i++) { 00270 __nlRowColumnDestroy(&(M->row[i])); 00271 } 00272 __NL_DELETE_ARRAY(M->row); 00273 } 00274 if(M->storage & __NL_COLUMNS) { 00275 for(i=0; i<M->n; i++) { 00276 __nlRowColumnDestroy(&(M->column[i])); 00277 } 00278 __NL_DELETE_ARRAY(M->column); 00279 } 00280 #ifdef NL_PARANOID 00281 __NL_CLEAR(__NLSparseMatrix,M); 00282 #endif 00283 } 00284 00285 static void __nlSparseMatrixAdd( 00286 __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value 00287 ) { 00288 __nl_parano_range_assert(i, 0, M->m - 1); 00289 __nl_parano_range_assert(j, 0, M->n - 1); 00290 if((M->storage & __NL_SYMMETRIC) && (j > i)) { 00291 return; 00292 } 00293 if(i == j) { 00294 M->diag[i] += value; 00295 } 00296 if(M->storage & __NL_ROWS) { 00297 __nlRowColumnAdd(&(M->row[i]), j, value); 00298 } 00299 if(M->storage & __NL_COLUMNS) { 00300 __nlRowColumnAdd(&(M->column[j]), i, value); 00301 } 00302 } 00303 00304 static void __nlSparseMatrixClear( __NLSparseMatrix* M) { 00305 NLuint i; 00306 if(M->storage & __NL_ROWS) { 00307 for(i=0; i<M->m; i++) { 00308 __nlRowColumnClear(&(M->row[i])); 00309 } 00310 } 00311 if(M->storage & __NL_COLUMNS) { 00312 for(i=0; i<M->n; i++) { 00313 __nlRowColumnClear(&(M->column[i])); 00314 } 00315 } 00316 __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size); 00317 } 00318 00319 /* Returns the number of non-zero coefficients */ 00320 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) { 00321 NLuint nnz = 0; 00322 NLuint i; 00323 if(M->storage & __NL_ROWS) { 00324 for(i = 0; i<M->m; i++) { 00325 nnz += M->row[i].size; 00326 } 00327 } else if (M->storage & __NL_COLUMNS) { 00328 for(i = 0; i<M->n; i++) { 00329 nnz += M->column[i].size; 00330 } 00331 } else { 00332 __nl_assert_not_reached; 00333 } 00334 return nnz; 00335 } 00336 00337 /************************************************************************************/ 00338 /* SparseMatrix x Vector routines, internal helper routines */ 00339 00340 static void __nlSparseMatrix_mult_rows_symmetric( 00341 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00342 ) { 00343 NLuint m = A->m; 00344 NLuint i,ij; 00345 __NLRowColumn* Ri = NULL; 00346 __NLCoeff* c = NULL; 00347 for(i=0; i<m; i++) { 00348 y[i] = 0; 00349 Ri = &(A->row[i]); 00350 for(ij=0; ij<Ri->size; ij++) { 00351 c = &(Ri->coeff[ij]); 00352 y[i] += c->value * x[c->index]; 00353 if(i != c->index) { 00354 y[c->index] += c->value * x[i]; 00355 } 00356 } 00357 } 00358 } 00359 00360 static void __nlSparseMatrix_mult_rows( 00361 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00362 ) { 00363 NLuint m = A->m; 00364 NLuint i,ij; 00365 __NLRowColumn* Ri = NULL; 00366 __NLCoeff* c = NULL; 00367 for(i=0; i<m; i++) { 00368 y[i] = 0; 00369 Ri = &(A->row[i]); 00370 for(ij=0; ij<Ri->size; ij++) { 00371 c = &(Ri->coeff[ij]); 00372 y[i] += c->value * x[c->index]; 00373 } 00374 } 00375 } 00376 00377 static void __nlSparseMatrix_mult_cols_symmetric( 00378 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00379 ) { 00380 NLuint n = A->n; 00381 NLuint j,ii; 00382 __NLRowColumn* Cj = NULL; 00383 __NLCoeff* c = NULL; 00384 for(j=0; j<n; j++) { 00385 y[j] = 0; 00386 Cj = &(A->column[j]); 00387 for(ii=0; ii<Cj->size; ii++) { 00388 c = &(Cj->coeff[ii]); 00389 y[c->index] += c->value * x[j]; 00390 if(j != c->index) { 00391 y[j] += c->value * x[c->index]; 00392 } 00393 } 00394 } 00395 } 00396 00397 static void __nlSparseMatrix_mult_cols( 00398 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00399 ) { 00400 NLuint n = A->n; 00401 NLuint j,ii; 00402 __NLRowColumn* Cj = NULL; 00403 __NLCoeff* c = NULL; 00404 __NL_CLEAR_ARRAY(NLfloat, y, A->m); 00405 for(j=0; j<n; j++) { 00406 Cj = &(A->column[j]); 00407 for(ii=0; ii<Cj->size; ii++) { 00408 c = &(Cj->coeff[ii]); 00409 y[c->index] += c->value * x[j]; 00410 } 00411 } 00412 } 00413 00414 /************************************************************************************/ 00415 /* SparseMatrix x Vector routines, main driver routine */ 00416 00417 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) { 00418 if(A->storage & __NL_ROWS) { 00419 if(A->storage & __NL_SYMMETRIC) { 00420 __nlSparseMatrix_mult_rows_symmetric(A, x, y); 00421 } else { 00422 __nlSparseMatrix_mult_rows(A, x, y); 00423 } 00424 } else { 00425 if(A->storage & __NL_SYMMETRIC) { 00426 __nlSparseMatrix_mult_cols_symmetric(A, x, y); 00427 } else { 00428 __nlSparseMatrix_mult_cols(A, x, y); 00429 } 00430 } 00431 } 00432 00433 /* ****************** Routines for least squares ******************* */ 00434 00435 static void __nlSparseMatrix_square( 00436 __NLSparseMatrix* AtA, __NLSparseMatrix *A 00437 ) { 00438 NLuint m = A->m; 00439 NLuint n = A->n; 00440 NLuint i, j0, j1; 00441 __NLRowColumn *Ri = NULL; 00442 __NLCoeff *c0 = NULL, *c1 = NULL; 00443 float value; 00444 00445 __nlSparseMatrixConstruct(AtA, n, n, A->storage); 00446 00447 for(i=0; i<m; i++) { 00448 Ri = &(A->row[i]); 00449 00450 for(j0=0; j0<Ri->size; j0++) { 00451 c0 = &(Ri->coeff[j0]); 00452 for(j1=0; j1<Ri->size; j1++) { 00453 c1 = &(Ri->coeff[j1]); 00454 00455 value = c0->value*c1->value; 00456 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value); 00457 } 00458 } 00459 } 00460 } 00461 00462 static void __nlSparseMatrix_transpose_mult_rows( 00463 __NLSparseMatrix* A, NLfloat* x, NLfloat* y 00464 ) { 00465 NLuint m = A->m; 00466 NLuint n = A->n; 00467 NLuint i,ij; 00468 __NLRowColumn* Ri = NULL; 00469 __NLCoeff* c = NULL; 00470 00471 __NL_CLEAR_ARRAY(NLfloat, y, n); 00472 00473 for(i=0; i<m; i++) { 00474 Ri = &(A->row[i]); 00475 for(ij=0; ij<Ri->size; ij++) { 00476 c = &(Ri->coeff[ij]); 00477 y[c->index] += c->value * x[i]; 00478 } 00479 } 00480 } 00481 00482 /************************************************************************************/ 00483 /* NLContext data structure */ 00484 00485 typedef void(*__NLMatrixFunc)(float* x, float* y); 00486 00487 typedef struct { 00488 NLfloat value[4]; 00489 NLboolean locked; 00490 NLuint index; 00491 __NLRowColumn *a; 00492 } __NLVariable; 00493 00494 #define __NL_STATE_INITIAL 0 00495 #define __NL_STATE_SYSTEM 1 00496 #define __NL_STATE_MATRIX 2 00497 #define __NL_STATE_MATRIX_CONSTRUCTED 3 00498 #define __NL_STATE_SYSTEM_CONSTRUCTED 4 00499 #define __NL_STATE_SYSTEM_SOLVED 5 00500 00501 typedef struct { 00502 NLenum state; 00503 NLuint n; 00504 NLuint m; 00505 __NLVariable* variable; 00506 NLfloat* b; 00507 NLfloat* Mtb; 00508 __NLSparseMatrix M; 00509 __NLSparseMatrix MtM; 00510 NLfloat* x; 00511 NLuint nb_variables; 00512 NLuint nb_rows; 00513 NLboolean least_squares; 00514 NLboolean symmetric; 00515 NLuint nb_rhs; 00516 NLboolean solve_again; 00517 NLboolean alloc_M; 00518 NLboolean alloc_MtM; 00519 NLboolean alloc_variable; 00520 NLboolean alloc_x; 00521 NLboolean alloc_b; 00522 NLboolean alloc_Mtb; 00523 NLfloat error; 00524 __NLMatrixFunc matrix_vector_prod; 00525 00526 struct __NLSuperLUContext { 00527 NLboolean alloc_slu; 00528 SuperMatrix L, U; 00529 NLint *perm_c, *perm_r; 00530 SuperLUStat_t stat; 00531 } slu; 00532 } __NLContext; 00533 00534 static __NLContext* __nlCurrentContext = NULL; 00535 00536 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) { 00537 __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y); 00538 } 00539 00540 00541 NLContext nlNewContext(void) { 00542 __NLContext* result = __NL_NEW(__NLContext); 00543 result->state = __NL_STATE_INITIAL; 00544 result->matrix_vector_prod = __nlMatrixVectorProd_default; 00545 result->nb_rhs = 1; 00546 nlMakeCurrent(result); 00547 return result; 00548 } 00549 00550 static void __nlFree_SUPERLU(__NLContext *context); 00551 00552 void nlDeleteContext(NLContext context_in) { 00553 __NLContext* context = (__NLContext*)(context_in); 00554 int i; 00555 00556 if(__nlCurrentContext == context) { 00557 __nlCurrentContext = NULL; 00558 } 00559 if(context->alloc_M) { 00560 __nlSparseMatrixDestroy(&context->M); 00561 } 00562 if(context->alloc_MtM) { 00563 __nlSparseMatrixDestroy(&context->MtM); 00564 } 00565 if(context->alloc_variable) { 00566 for(i=0; i<context->nb_variables; i++) { 00567 if(context->variable[i].a) { 00568 __nlRowColumnDestroy(context->variable[i].a); 00569 __NL_DELETE(context->variable[i].a); 00570 } 00571 } 00572 } 00573 if(context->alloc_b) { 00574 __NL_DELETE_ARRAY(context->b); 00575 } 00576 if(context->alloc_Mtb) { 00577 __NL_DELETE_ARRAY(context->Mtb); 00578 } 00579 if(context->alloc_x) { 00580 __NL_DELETE_ARRAY(context->x); 00581 } 00582 if (context->slu.alloc_slu) { 00583 __nlFree_SUPERLU(context); 00584 } 00585 00586 #ifdef NL_PARANOID 00587 __NL_CLEAR(__NLContext, context); 00588 #endif 00589 __NL_DELETE(context); 00590 } 00591 00592 void nlMakeCurrent(NLContext context) { 00593 __nlCurrentContext = (__NLContext*)(context); 00594 } 00595 00596 NLContext nlGetCurrent(void) { 00597 return __nlCurrentContext; 00598 } 00599 00600 static void __nlCheckState(NLenum state) { 00601 __nl_assert(__nlCurrentContext->state == state); 00602 } 00603 00604 static void __nlTransition(NLenum from_state, NLenum to_state) { 00605 __nlCheckState(from_state); 00606 __nlCurrentContext->state = to_state; 00607 } 00608 00609 /************************************************************************************/ 00610 /* Get/Set parameters */ 00611 00612 void nlSolverParameterf(NLenum pname, NLfloat param) { 00613 __nlCheckState(__NL_STATE_INITIAL); 00614 switch(pname) { 00615 case NL_NB_VARIABLES: { 00616 __nl_assert(param > 0); 00617 __nlCurrentContext->nb_variables = (NLuint)param; 00618 } break; 00619 case NL_NB_ROWS: { 00620 __nl_assert(param > 0); 00621 __nlCurrentContext->nb_rows = (NLuint)param; 00622 } break; 00623 case NL_LEAST_SQUARES: { 00624 __nlCurrentContext->least_squares = (NLboolean)param; 00625 } break; 00626 case NL_SYMMETRIC: { 00627 __nlCurrentContext->symmetric = (NLboolean)param; 00628 } break; 00629 case NL_NB_RIGHT_HAND_SIDES: { 00630 __nlCurrentContext->nb_rhs = (NLuint)param; 00631 } break; 00632 default: { 00633 __nl_assert_not_reached; 00634 } break; 00635 } 00636 } 00637 00638 void nlSolverParameteri(NLenum pname, NLint param) { 00639 __nlCheckState(__NL_STATE_INITIAL); 00640 switch(pname) { 00641 case NL_NB_VARIABLES: { 00642 __nl_assert(param > 0); 00643 __nlCurrentContext->nb_variables = (NLuint)param; 00644 } break; 00645 case NL_NB_ROWS: { 00646 __nl_assert(param > 0); 00647 __nlCurrentContext->nb_rows = (NLuint)param; 00648 } break; 00649 case NL_LEAST_SQUARES: { 00650 __nlCurrentContext->least_squares = (NLboolean)param; 00651 } break; 00652 case NL_SYMMETRIC: { 00653 __nlCurrentContext->symmetric = (NLboolean)param; 00654 } break; 00655 case NL_NB_RIGHT_HAND_SIDES: { 00656 __nlCurrentContext->nb_rhs = (NLuint)param; 00657 } break; 00658 default: { 00659 __nl_assert_not_reached; 00660 } break; 00661 } 00662 } 00663 00664 void nlGetBooleanv(NLenum pname, NLboolean* params) { 00665 switch(pname) { 00666 case NL_LEAST_SQUARES: { 00667 *params = __nlCurrentContext->least_squares; 00668 } break; 00669 case NL_SYMMETRIC: { 00670 *params = __nlCurrentContext->symmetric; 00671 } break; 00672 default: { 00673 __nl_assert_not_reached; 00674 } break; 00675 } 00676 } 00677 00678 void nlGetFloatv(NLenum pname, NLfloat* params) { 00679 switch(pname) { 00680 case NL_NB_VARIABLES: { 00681 *params = (NLfloat)(__nlCurrentContext->nb_variables); 00682 } break; 00683 case NL_NB_ROWS: { 00684 *params = (NLfloat)(__nlCurrentContext->nb_rows); 00685 } break; 00686 case NL_LEAST_SQUARES: { 00687 *params = (NLfloat)(__nlCurrentContext->least_squares); 00688 } break; 00689 case NL_SYMMETRIC: { 00690 *params = (NLfloat)(__nlCurrentContext->symmetric); 00691 } break; 00692 case NL_ERROR: { 00693 *params = (NLfloat)(__nlCurrentContext->error); 00694 } break; 00695 default: { 00696 __nl_assert_not_reached; 00697 } break; 00698 } 00699 } 00700 00701 void nlGetIntergerv(NLenum pname, NLint* params) { 00702 switch(pname) { 00703 case NL_NB_VARIABLES: { 00704 *params = (NLint)(__nlCurrentContext->nb_variables); 00705 } break; 00706 case NL_NB_ROWS: { 00707 *params = (NLint)(__nlCurrentContext->nb_rows); 00708 } break; 00709 case NL_LEAST_SQUARES: { 00710 *params = (NLint)(__nlCurrentContext->least_squares); 00711 } break; 00712 case NL_SYMMETRIC: { 00713 *params = (NLint)(__nlCurrentContext->symmetric); 00714 } break; 00715 default: { 00716 __nl_assert_not_reached; 00717 } break; 00718 } 00719 } 00720 00721 /************************************************************************************/ 00722 /* Enable / Disable */ 00723 00724 void nlEnable(NLenum pname) { 00725 switch(pname) { 00726 default: { 00727 __nl_assert_not_reached; 00728 } 00729 } 00730 } 00731 00732 void nlDisable(NLenum pname) { 00733 switch(pname) { 00734 default: { 00735 __nl_assert_not_reached; 00736 } 00737 } 00738 } 00739 00740 NLboolean nlIsEnabled(NLenum pname) { 00741 switch(pname) { 00742 default: { 00743 __nl_assert_not_reached; 00744 } 00745 } 00746 return NL_FALSE; 00747 } 00748 00749 /************************************************************************************/ 00750 /* Get/Set Lock/Unlock variables */ 00751 00752 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) { 00753 __nlCheckState(__NL_STATE_SYSTEM); 00754 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00755 __nlCurrentContext->variable[index].value[rhsindex] = value; 00756 } 00757 00758 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) { 00759 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL); 00760 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00761 return __nlCurrentContext->variable[index].value[rhsindex]; 00762 } 00763 00764 void nlLockVariable(NLuint index) { 00765 __nlCheckState(__NL_STATE_SYSTEM); 00766 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00767 __nlCurrentContext->variable[index].locked = NL_TRUE; 00768 } 00769 00770 void nlUnlockVariable(NLuint index) { 00771 __nlCheckState(__NL_STATE_SYSTEM); 00772 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00773 __nlCurrentContext->variable[index].locked = NL_FALSE; 00774 } 00775 00776 NLboolean nlVariableIsLocked(NLuint index) { 00777 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL); 00778 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1); 00779 return __nlCurrentContext->variable[index].locked; 00780 } 00781 00782 /************************************************************************************/ 00783 /* System construction */ 00784 00785 static void __nlVariablesToVector() { 00786 __NLContext *context = __nlCurrentContext; 00787 NLuint i, j, nb_rhs; 00788 00789 __nl_assert(context->alloc_x); 00790 __nl_assert(context->alloc_variable); 00791 00792 nb_rhs= context->nb_rhs; 00793 00794 for(i=0; i<context->nb_variables; i++) { 00795 __NLVariable* v = &(context->variable[i]); 00796 if(!v->locked) { 00797 __nl_assert(v->index < context->n); 00798 00799 for(j=0; j<nb_rhs; j++) 00800 context->x[context->n*j + v->index] = v->value[j]; 00801 } 00802 } 00803 } 00804 00805 static void __nlVectorToVariables() { 00806 __NLContext *context = __nlCurrentContext; 00807 NLuint i, j, nb_rhs; 00808 00809 __nl_assert(context->alloc_x); 00810 __nl_assert(context->alloc_variable); 00811 00812 nb_rhs= context->nb_rhs; 00813 00814 for(i=0; i<context->nb_variables; i++) { 00815 __NLVariable* v = &(context->variable[i]); 00816 if(!v->locked) { 00817 __nl_assert(v->index < context->n); 00818 00819 for(j=0; j<nb_rhs; j++) 00820 v->value[j] = context->x[context->n*j + v->index]; 00821 } 00822 } 00823 } 00824 00825 static void __nlBeginSystem() { 00826 __nl_assert(__nlCurrentContext->nb_variables > 0); 00827 00828 if (__nlCurrentContext->solve_again) 00829 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM); 00830 else { 00831 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM); 00832 00833 __nlCurrentContext->variable = __NL_NEW_ARRAY( 00834 __NLVariable, __nlCurrentContext->nb_variables); 00835 00836 __nlCurrentContext->alloc_variable = NL_TRUE; 00837 } 00838 } 00839 00840 static void __nlEndSystem() { 00841 __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED); 00842 } 00843 00844 static void __nlBeginMatrix() { 00845 NLuint i; 00846 NLuint m = 0, n = 0; 00847 NLenum storage = __NL_ROWS; 00848 __NLContext *context = __nlCurrentContext; 00849 00850 __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX); 00851 00852 if (!context->solve_again) { 00853 for(i=0; i<context->nb_variables; i++) { 00854 if(context->variable[i].locked) { 00855 context->variable[i].index = ~0; 00856 context->variable[i].a = __NL_NEW(__NLRowColumn); 00857 __nlRowColumnConstruct(context->variable[i].a); 00858 } 00859 else 00860 context->variable[i].index = n++; 00861 } 00862 00863 m = (context->nb_rows == 0)? n: context->nb_rows; 00864 00865 context->m = m; 00866 context->n = n; 00867 00868 __nlSparseMatrixConstruct(&context->M, m, n, storage); 00869 context->alloc_M = NL_TRUE; 00870 00871 context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs); 00872 context->alloc_b = NL_TRUE; 00873 00874 context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs); 00875 context->alloc_x = NL_TRUE; 00876 } 00877 else { 00878 /* need to recompute b only, A is not constructed anymore */ 00879 __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs); 00880 } 00881 00882 __nlVariablesToVector(); 00883 } 00884 00885 static void __nlEndMatrixRHS(NLuint rhs) { 00886 __NLContext *context = __nlCurrentContext; 00887 __NLVariable *variable; 00888 __NLRowColumn *a; 00889 NLfloat *b, *Mtb; 00890 NLuint i, j; 00891 00892 b = context->b + context->m*rhs; 00893 Mtb = context->Mtb + context->n*rhs; 00894 00895 for(i=0; i<__nlCurrentContext->nb_variables; i++) { 00896 variable = &(context->variable[i]); 00897 00898 if(variable->locked) { 00899 a = variable->a; 00900 00901 for(j=0; j<a->size; j++) { 00902 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs]; 00903 } 00904 } 00905 } 00906 00907 if(context->least_squares) 00908 __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb); 00909 } 00910 00911 static void __nlEndMatrix() { 00912 __NLContext *context = __nlCurrentContext; 00913 NLuint i; 00914 00915 __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED); 00916 00917 if(context->least_squares) { 00918 if(!__nlCurrentContext->solve_again) { 00919 __nlSparseMatrix_square(&context->MtM, &context->M); 00920 context->alloc_MtM = NL_TRUE; 00921 00922 context->Mtb = 00923 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs); 00924 context->alloc_Mtb = NL_TRUE; 00925 } 00926 } 00927 00928 for(i=0; i<context->nb_rhs; i++) 00929 __nlEndMatrixRHS(i); 00930 } 00931 00932 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value) 00933 { 00934 __NLContext *context = __nlCurrentContext; 00935 00936 __nlCheckState(__NL_STATE_MATRIX); 00937 00938 if(context->solve_again) 00939 return; 00940 00941 if (!context->least_squares && context->variable[row].locked); 00942 else if (context->variable[col].locked) { 00943 if(!context->least_squares) 00944 row = context->variable[row].index; 00945 __nlRowColumnAppend(context->variable[col].a, row, value); 00946 } 00947 else { 00948 __NLSparseMatrix* M = &context->M; 00949 00950 if(!context->least_squares) 00951 row = context->variable[row].index; 00952 col = context->variable[col].index; 00953 00954 __nl_range_assert(row, 0, context->m - 1); 00955 __nl_range_assert(col, 0, context->n - 1); 00956 00957 __nlSparseMatrixAdd(M, row, col, value); 00958 } 00959 } 00960 00961 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value) 00962 { 00963 __NLContext *context = __nlCurrentContext; 00964 NLfloat* b = context->b; 00965 00966 __nlCheckState(__NL_STATE_MATRIX); 00967 00968 if(context->least_squares) { 00969 __nl_range_assert(index, 0, context->m - 1); 00970 b[rhsindex*context->m + index] += value; 00971 } 00972 else { 00973 if(!context->variable[index].locked) { 00974 index = context->variable[index].index; 00975 __nl_range_assert(index, 0, context->m - 1); 00976 00977 b[rhsindex*context->m + index] += value; 00978 } 00979 } 00980 } 00981 00982 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value) 00983 { 00984 __NLContext *context = __nlCurrentContext; 00985 NLfloat* b = context->b; 00986 00987 __nlCheckState(__NL_STATE_MATRIX); 00988 00989 if(context->least_squares) { 00990 __nl_range_assert(index, 0, context->m - 1); 00991 b[rhsindex*context->m + index] = value; 00992 } 00993 else { 00994 if(!context->variable[index].locked) { 00995 index = context->variable[index].index; 00996 __nl_range_assert(index, 0, context->m - 1); 00997 00998 b[rhsindex*context->m + index] = value; 00999 } 01000 } 01001 } 01002 01003 void nlBegin(NLenum prim) { 01004 switch(prim) { 01005 case NL_SYSTEM: { 01006 __nlBeginSystem(); 01007 } break; 01008 case NL_MATRIX: { 01009 __nlBeginMatrix(); 01010 } break; 01011 default: { 01012 __nl_assert_not_reached; 01013 } 01014 } 01015 } 01016 01017 void nlEnd(NLenum prim) { 01018 switch(prim) { 01019 case NL_SYSTEM: { 01020 __nlEndSystem(); 01021 } break; 01022 case NL_MATRIX: { 01023 __nlEndMatrix(); 01024 } break; 01025 default: { 01026 __nl_assert_not_reached; 01027 } 01028 } 01029 } 01030 01031 /************************************************************************/ 01032 /* SuperLU wrapper */ 01033 01034 /* Note: SuperLU is difficult to call, but it is worth it. */ 01035 /* Here is a driver inspired by A. Sheffer's "cow flattener". */ 01036 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { 01037 01038 /* OpenNL Context */ 01039 __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; 01040 NLuint n = context->n; 01041 NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ 01042 01043 /* Compressed Row Storage matrix representation */ 01044 NLint *xa = __NL_NEW_ARRAY(NLint, n+1); 01045 NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n); 01046 NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz); 01047 NLint *asub = __NL_NEW_ARRAY(NLint, nnz); 01048 NLint *etree = __NL_NEW_ARRAY(NLint, n); 01049 01050 /* SuperLU variables */ 01051 SuperMatrix At, AtP; 01052 NLint info, panel_size, relax; 01053 superlu_options_t options; 01054 01055 /* Temporary variables */ 01056 NLuint i, jj, count; 01057 01058 __nl_assert(!(M->storage & __NL_SYMMETRIC)); 01059 __nl_assert(M->storage & __NL_ROWS); 01060 __nl_assert(M->m == M->n); 01061 01062 /* Convert M to compressed column format */ 01063 for(i=0, count=0; i<n; i++) { 01064 __NLRowColumn *Ri = M->row + i; 01065 xa[i] = count; 01066 01067 for(jj=0; jj<Ri->size; jj++, count++) { 01068 a[count] = Ri->coeff[jj].value; 01069 asub[count] = Ri->coeff[jj].index; 01070 } 01071 } 01072 xa[n] = nnz; 01073 01074 /* Free M, don't need it anymore at this point */ 01075 __nlSparseMatrixClear(M); 01076 01077 /* Create superlu A matrix transposed */ 01078 sCreate_CompCol_Matrix( 01079 &At, n, n, nnz, a, asub, xa, 01080 SLU_NC, /* Colum wise, no supernode */ 01081 SLU_S, /* floats */ 01082 SLU_GE /* general storage */ 01083 ); 01084 01085 /* Set superlu options */ 01086 set_default_options(&options); 01087 options.ColPerm = MY_PERMC; 01088 options.Fact = DOFACT; 01089 01090 StatInit(&(context->slu.stat)); 01091 01092 panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ 01093 relax = sp_ienv(2); 01094 01095 /* Compute permutation and permuted matrix */ 01096 context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); 01097 context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); 01098 01099 if ((permutation == NULL) || (*permutation == -1)) { 01100 get_perm_c(3, &At, context->slu.perm_c); 01101 01102 if (permutation) 01103 memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); 01104 } 01105 else 01106 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); 01107 01108 sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); 01109 01110 /* Decompose into L and U */ 01111 sgstrf(&options, &AtP, relax, panel_size, 01112 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, 01113 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); 01114 01115 /* Cleanup */ 01116 01117 Destroy_SuperMatrix_Store(&At); 01118 Destroy_CompCol_Permuted(&AtP); 01119 01120 __NL_DELETE_ARRAY(etree); 01121 __NL_DELETE_ARRAY(xa); 01122 __NL_DELETE_ARRAY(rhs); 01123 __NL_DELETE_ARRAY(a); 01124 __NL_DELETE_ARRAY(asub); 01125 01126 context->slu.alloc_slu = NL_TRUE; 01127 01128 return (info == 0); 01129 } 01130 01131 static NLboolean __nlInvert_SUPERLU(__NLContext *context) { 01132 01133 /* OpenNL Context */ 01134 NLfloat* b = (context->least_squares)? context->Mtb: context->b; 01135 NLfloat* x = context->x; 01136 NLuint n = context->n, j; 01137 01138 /* SuperLU variables */ 01139 SuperMatrix B; 01140 NLint info = 0; 01141 01142 for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) { 01143 /* Create superlu array for B */ 01144 sCreate_Dense_Matrix( 01145 &B, n, 1, b, n, 01146 SLU_DN, /* Fortran-type column-wise storage */ 01147 SLU_S, /* floats */ 01148 SLU_GE /* general */ 01149 ); 01150 01151 /* Forward/Back substitution to compute x */ 01152 sgstrs(TRANS, &(context->slu.L), &(context->slu.U), 01153 context->slu.perm_c, context->slu.perm_r, &B, 01154 &(context->slu.stat), &info); 01155 01156 if(info == 0) 01157 memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n); 01158 01159 Destroy_SuperMatrix_Store(&B); 01160 } 01161 01162 return (info == 0); 01163 } 01164 01165 static void __nlFree_SUPERLU(__NLContext *context) { 01166 01167 Destroy_SuperNode_Matrix(&(context->slu.L)); 01168 Destroy_CompCol_Matrix(&(context->slu.U)); 01169 01170 StatFree(&(context->slu.stat)); 01171 01172 __NL_DELETE_ARRAY(context->slu.perm_r); 01173 __NL_DELETE_ARRAY(context->slu.perm_c); 01174 01175 context->slu.alloc_slu = NL_FALSE; 01176 } 01177 01178 void nlPrintMatrix(void) { 01179 __NLContext *context = __nlCurrentContext; 01180 __NLSparseMatrix* M = &(context->M); 01181 __NLSparseMatrix* MtM = &(context->MtM); 01182 float *b = context->b; 01183 NLuint i, jj, k; 01184 NLuint m = context->m; 01185 NLuint n = context->n; 01186 __NLRowColumn* Ri = NULL; 01187 float *value = malloc(sizeof(*value)*(n+m)); 01188 01189 printf("A:\n"); 01190 for(i=0; i<m; i++) { 01191 Ri = &(M->row[i]); 01192 01193 memset(value, 0.0, sizeof(*value)*n); 01194 for(jj=0; jj<Ri->size; jj++) 01195 value[Ri->coeff[jj].index] = Ri->coeff[jj].value; 01196 01197 for (k = 0; k<n; k++) 01198 printf("%.3f ", value[k]); 01199 printf("\n"); 01200 } 01201 01202 for(k=0; k<context->nb_rhs; k++) { 01203 printf("b (%d):\n", k); 01204 for(i=0; i<n; i++) 01205 printf("%f ", b[context->n*k + i]); 01206 printf("\n"); 01207 } 01208 01209 if(context->alloc_MtM) { 01210 printf("AtA:\n"); 01211 for(i=0; i<n; i++) { 01212 Ri = &(MtM->row[i]); 01213 01214 memset(value, 0.0, sizeof(*value)*m); 01215 for(jj=0; jj<Ri->size; jj++) 01216 value[Ri->coeff[jj].index] = Ri->coeff[jj].value; 01217 01218 for (k = 0; k<n; k++) 01219 printf("%.3f ", value[k]); 01220 printf("\n"); 01221 } 01222 01223 for(k=0; k<context->nb_rhs; k++) { 01224 printf("Mtb (%d):\n", k); 01225 for(i=0; i<n; i++) 01226 printf("%f ", context->Mtb[context->n*k + i]); 01227 printf("\n"); 01228 } 01229 printf("\n"); 01230 } 01231 01232 free(value); 01233 } 01234 01235 /************************************************************************/ 01236 /* nlSolve() driver routine */ 01237 01238 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) { 01239 NLboolean result = NL_TRUE; 01240 01241 __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED); 01242 01243 if (!__nlCurrentContext->solve_again) 01244 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation); 01245 01246 if (result) { 01247 result = __nlInvert_SUPERLU(__nlCurrentContext); 01248 01249 if (result) { 01250 __nlVectorToVariables(); 01251 01252 if (solveAgain) 01253 __nlCurrentContext->solve_again = NL_TRUE; 01254 01255 __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED); 01256 } 01257 } 01258 01259 return result; 01260 } 01261 01262 NLboolean nlSolve() { 01263 return nlSolveAdvanced(NULL, NL_FALSE); 01264 } 01265