Blender: svbvh.h Source File

Blender V2.61 - r43446
00001 /*
00002  * ***** BEGIN GPL LICENSE BLOCK *****
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version. 
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  *
00018  * The Original Code is Copyright (C) 2009 Blender Foundation.
00019  * All rights reserved.
00020  *
00021  * The Original Code is: all of this file.
00022  *
00023  * Contributor(s): André Pinto.
00024  *
00025  * ***** END GPL LICENSE BLOCK *****
00026  */
00027 
00033 #ifdef __SSE__
00034  
00035 #ifndef RE_RAYTRACE_SVBVH_H
00036 #define RE_RAYTRACE_SVBVH_H
00037 
00038 #include "bvh.h"
00039 #include "BLI_memarena.h"
00040 #include "BKE_global.h"
00041 #include <stdio.h>
00042 #include <algorithm>
00043 
00044 struct SVBVHNode
00045 {
00046     float child_bb[24];
00047     SVBVHNode *child[4];
00048     int nchilds;
00049 };
00050 
00051 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group)
00052 {
00053     const __m128 tmin0 = _mm_setzero_ps();
00054     const __m128 tmax0 = _mm_set_ps1(isec->dist);
00055 
00056     const __m128 start0 = _mm_set_ps1(isec->start[0]);
00057     const __m128 start1 = _mm_set_ps1(isec->start[1]);
00058     const __m128 start2 = _mm_set_ps1(isec->start[2]);
00059     const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0);
00060     const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0);
00061     const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1);
00062     const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1);
00063     const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2);
00064     const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2);
00065     const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]);
00066     const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]);
00067     const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]);
00068     const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0);
00069     const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0);
00070     const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1);
00071     const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1);
00072     const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2);
00073     const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2);
00074     const __m128 tmin1 = _mm_max_ps(tmin0, mul0);
00075     const __m128 tmax1 = _mm_min_ps(tmax0, mul1);
00076     const __m128 tmin2 = _mm_max_ps(tmin1, mul2);
00077     const __m128 tmax2 = _mm_min_ps(tmax1, mul3);
00078     const __m128 tmin3 = _mm_max_ps(tmin2, mul4);
00079     const __m128 tmax3 = _mm_min_ps(tmax2, mul5);
00080     
00081     return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
00082 }
00083 
00084 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb)
00085 {
00086     const float *bb = _bb;
00087     
00088     float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
00089     float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
00090     float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
00091     float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
00092     float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
00093     float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
00094     
00095     RE_RC_COUNT(isec->raycounter->bb.test);
00096 
00097     if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
00098     if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
00099     if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
00100 
00101     RE_RC_COUNT(isec->raycounter->bb.hit);  
00102 
00103     return 1;
00104 }
00105 
00106 static bool svbvh_node_is_leaf(const SVBVHNode *node)
00107 {
00108     return !RE_rayobject_isAligned(node);
00109 }
00110 
00111 template<int MAX_STACK_SIZE, bool SHADOW>
00112 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
00113 {
00114     SVBVHNode *stack[MAX_STACK_SIZE], *node;
00115     int hit = 0, stack_pos = 0;
00116 
00117     stack[stack_pos++] = root;
00118 
00119     while(stack_pos)
00120     {
00121         node = stack[--stack_pos];
00122 
00123         if(!svbvh_node_is_leaf(node))
00124         {
00125             int nchilds= node->nchilds;
00126 
00127             if(nchilds == 4) {
00128                 float *child_bb= node->child_bb;
00129                 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb)));
00130                 SVBVHNode **child= node->child;
00131 
00132                 RE_RC_COUNT(isec->raycounter->simd_bb.test);
00133 
00134                 if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00135                 if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00136                 if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00137                 if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
00138             }
00139             else {
00140                 float *child_bb= node->child_bb;
00141                 SVBVHNode **child= node->child;
00142                 int i;
00143 
00144                 for(i=0; i<nchilds; i++)
00145                     if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i))
00146                         stack[stack_pos++] = child[i];
00147             }
00148         }
00149         else
00150         {
00151             hit |= RE_rayobject_intersect((RayObject*)node, isec);
00152             if(SHADOW && hit) break;
00153         }
00154     }
00155 
00156     return hit;
00157 }
00158 
00159 
00160 template<>
00161 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max)
00162 {
00163     if(is_leaf(node))
00164     {
00165         RE_rayobject_merge_bb((RayObject*)node, min, max);
00166     }
00167     else
00168     {
00169         int i=0;
00170         while(i+4 <= node->nchilds)
00171         {
00172             float *res = node->child_bb + 6*i;
00173             for(int j=0; j<3; j++)
00174             {
00175                 min[j] = MIN2(min[j], res[4*j+0]);
00176                 min[j] = MIN2(min[j], res[4*j+1]);
00177                 min[j] = MIN2(min[j], res[4*j+2]);
00178                 min[j] = MIN2(min[j], res[4*j+3]);
00179             }
00180             for(int j=0; j<3; j++)
00181             {
00182                 max[j] = MAX2(max[j], res[4*(j+3)+0]);
00183                 max[j] = MAX2(max[j], res[4*(j+3)+1]);
00184                 max[j] = MAX2(max[j], res[4*(j+3)+2]);
00185                 max[j] = MAX2(max[j], res[4*(j+3)+3]);
00186             }
00187             
00188             i += 4;
00189         }
00190 
00191         for(; i<node->nchilds; i++)
00192         {
00193             DO_MIN(node->child_bb+6*i  , min);
00194             DO_MAX(node->child_bb+3+6*i, max);
00195         }
00196     }
00197 }
00198 
00199 
00200 
00201 /*
00202  * Builds a SVBVH tree form a VBVHTree
00203  */
00204 template<class OldNode>
00205 struct Reorganize_SVBVH
00206 {
00207     MemArena *arena;
00208 
00209     float childs_per_node;
00210     int nodes_with_childs[16];
00211     int useless_bb;
00212     int nodes;
00213 
00214     Reorganize_SVBVH(MemArena *a)
00215     {
00216         arena = a;
00217         nodes = 0;
00218         childs_per_node = 0;
00219         useless_bb = 0;
00220         
00221         for(int i=0; i<16; i++)
00222             nodes_with_childs[i] = 0;
00223     }
00224     
00225     ~Reorganize_SVBVH()
00226     {
00227         if(G.f & G_DEBUG) {
00228             printf("%f childs per node\n", childs_per_node / nodes);
00229             printf("%d childs BB are useless\n", useless_bb);
00230             for(int i=0; i<16; i++)
00231                 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i]/float(nodes));
00232         }
00233     }
00234     
00235     SVBVHNode *create_node(int nchilds)
00236     {
00237         SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode));
00238         node->nchilds = nchilds;
00239 
00240         return node;
00241     }
00242     
00243     void copy_bb(float *bb, const float *old_bb)
00244     {
00245         std::copy(old_bb, old_bb+6, bb);
00246     }
00247     
00248     void prepare_for_simd(SVBVHNode *node)
00249     {
00250         int i=0;
00251         while(i+4 <= node->nchilds)
00252         {
00253             float vec_tmp[4*6];
00254             float *res = node->child_bb+6*i;
00255             std::copy(res, res+6*4, vec_tmp);
00256             
00257             for(int j=0; j<6; j++)
00258             {
00259                 res[4*j+0] = vec_tmp[6*0+j];
00260                 res[4*j+1] = vec_tmp[6*1+j];
00261                 res[4*j+2] = vec_tmp[6*2+j];
00262                 res[4*j+3] = vec_tmp[6*3+j];
00263             }
00264 
00265             i += 4;
00266         }
00267     }
00268 
00269     /* amt must be power of two */
00270     inline int padup(int num, int amt)
00271     {
00272         return ((num+(amt-1))&~(amt-1));
00273     }
00274     
00275     SVBVHNode *transform(OldNode *old)
00276     {
00277         if(is_leaf(old))
00278             return (SVBVHNode*)old;
00279         if(is_leaf(old->child))
00280             return (SVBVHNode*)old->child;
00281 
00282         int nchilds = count_childs(old);
00283         int alloc_childs = nchilds;
00284         if(nchilds % 4 > 2)
00285             alloc_childs = padup(nchilds, 4);
00286         
00287         SVBVHNode *node = create_node(alloc_childs);
00288 
00289         childs_per_node += nchilds;
00290         nodes++;
00291         if(nchilds < 16)
00292             nodes_with_childs[nchilds]++;
00293         
00294         useless_bb += alloc_childs-nchilds;
00295         while(alloc_childs > nchilds)
00296         {
00297             const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN };
00298             alloc_childs--;
00299             node->child[alloc_childs] = 0;
00300             copy_bb(node->child_bb+alloc_childs*6, def_bb);
00301         }
00302         
00303         int i=nchilds;
00304         for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
00305         {
00306             i--;
00307             node->child[i] = transform(o_child);
00308             if(is_leaf(o_child))
00309             {
00310                 float bb[6];
00311                 INIT_MINMAX(bb, bb+3);
00312                 RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3);
00313                 copy_bb(node->child_bb+i*6, bb);
00314                 break;
00315             }
00316             else
00317             {
00318                 copy_bb(node->child_bb+i*6, o_child->bb);
00319             }
00320         }
00321         assert(i == 0);
00322 
00323         prepare_for_simd(node);
00324         
00325         return node;
00326     }   
00327 };
00328 
00329 #endif
00330 
00331 #endif //__SSE__
00332