Blender V2.61 - r43446
|
00001 /* 00002 * ***** BEGIN GPL LICENSE BLOCK ***** 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU General Public License 00006 * as published by the Free Software Foundation; either version 2 00007 * of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software Foundation, 00016 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 * 00018 * The Original Code is Copyright (C) 2009 Blender Foundation. 00019 * All rights reserved. 00020 * 00021 * The Original Code is: all of this file. 00022 * 00023 * Contributor(s): André Pinto. 00024 * 00025 * ***** END GPL LICENSE BLOCK ***** 00026 */ 00027 00033 #ifdef __SSE__ 00034 00035 #ifndef RE_RAYTRACE_SVBVH_H 00036 #define RE_RAYTRACE_SVBVH_H 00037 00038 #include "bvh.h" 00039 #include "BLI_memarena.h" 00040 #include "BKE_global.h" 00041 #include <stdio.h> 00042 #include <algorithm> 00043 00044 struct SVBVHNode 00045 { 00046 float child_bb[24]; 00047 SVBVHNode *child[4]; 00048 int nchilds; 00049 }; 00050 00051 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group) 00052 { 00053 const __m128 tmin0 = _mm_setzero_ps(); 00054 const __m128 tmax0 = _mm_set_ps1(isec->dist); 00055 00056 const __m128 start0 = _mm_set_ps1(isec->start[0]); 00057 const __m128 start1 = _mm_set_ps1(isec->start[1]); 00058 const __m128 start2 = _mm_set_ps1(isec->start[2]); 00059 const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0); 00060 const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0); 00061 const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1); 00062 const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1); 00063 const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2); 00064 const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2); 00065 const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]); 00066 const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]); 00067 const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]); 00068 const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0); 00069 const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0); 00070 const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1); 00071 const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1); 00072 const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2); 00073 const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2); 00074 const __m128 tmin1 = _mm_max_ps(tmin0, mul0); 00075 const __m128 tmax1 = _mm_min_ps(tmax0, mul1); 00076 const __m128 tmin2 = _mm_max_ps(tmin1, mul2); 00077 const __m128 tmax2 = _mm_min_ps(tmax1, mul3); 00078 const __m128 tmin3 = _mm_max_ps(tmin2, mul4); 00079 const __m128 tmax3 = _mm_min_ps(tmax2, mul5); 00080 00081 return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3)); 00082 } 00083 00084 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb) 00085 { 00086 const float *bb = _bb; 00087 00088 float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0]; 00089 float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0]; 00090 float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1]; 00091 float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1]; 00092 float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2]; 00093 float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2]; 00094 00095 RE_RC_COUNT(isec->raycounter->bb.test); 00096 00097 if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0; 00098 if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0; 00099 if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0; 00100 00101 RE_RC_COUNT(isec->raycounter->bb.hit); 00102 00103 return 1; 00104 } 00105 00106 static bool svbvh_node_is_leaf(const SVBVHNode *node) 00107 { 00108 return !RE_rayobject_isAligned(node); 00109 } 00110 00111 template<int MAX_STACK_SIZE, bool SHADOW> 00112 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec) 00113 { 00114 SVBVHNode *stack[MAX_STACK_SIZE], *node; 00115 int hit = 0, stack_pos = 0; 00116 00117 stack[stack_pos++] = root; 00118 00119 while(stack_pos) 00120 { 00121 node = stack[--stack_pos]; 00122 00123 if(!svbvh_node_is_leaf(node)) 00124 { 00125 int nchilds= node->nchilds; 00126 00127 if(nchilds == 4) { 00128 float *child_bb= node->child_bb; 00129 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb))); 00130 SVBVHNode **child= node->child; 00131 00132 RE_RC_COUNT(isec->raycounter->simd_bb.test); 00133 00134 if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00135 if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00136 if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00137 if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); } 00138 } 00139 else { 00140 float *child_bb= node->child_bb; 00141 SVBVHNode **child= node->child; 00142 int i; 00143 00144 for(i=0; i<nchilds; i++) 00145 if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i)) 00146 stack[stack_pos++] = child[i]; 00147 } 00148 } 00149 else 00150 { 00151 hit |= RE_rayobject_intersect((RayObject*)node, isec); 00152 if(SHADOW && hit) break; 00153 } 00154 } 00155 00156 return hit; 00157 } 00158 00159 00160 template<> 00161 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max) 00162 { 00163 if(is_leaf(node)) 00164 { 00165 RE_rayobject_merge_bb((RayObject*)node, min, max); 00166 } 00167 else 00168 { 00169 int i=0; 00170 while(i+4 <= node->nchilds) 00171 { 00172 float *res = node->child_bb + 6*i; 00173 for(int j=0; j<3; j++) 00174 { 00175 min[j] = MIN2(min[j], res[4*j+0]); 00176 min[j] = MIN2(min[j], res[4*j+1]); 00177 min[j] = MIN2(min[j], res[4*j+2]); 00178 min[j] = MIN2(min[j], res[4*j+3]); 00179 } 00180 for(int j=0; j<3; j++) 00181 { 00182 max[j] = MAX2(max[j], res[4*(j+3)+0]); 00183 max[j] = MAX2(max[j], res[4*(j+3)+1]); 00184 max[j] = MAX2(max[j], res[4*(j+3)+2]); 00185 max[j] = MAX2(max[j], res[4*(j+3)+3]); 00186 } 00187 00188 i += 4; 00189 } 00190 00191 for(; i<node->nchilds; i++) 00192 { 00193 DO_MIN(node->child_bb+6*i , min); 00194 DO_MAX(node->child_bb+3+6*i, max); 00195 } 00196 } 00197 } 00198 00199 00200 00201 /* 00202 * Builds a SVBVH tree form a VBVHTree 00203 */ 00204 template<class OldNode> 00205 struct Reorganize_SVBVH 00206 { 00207 MemArena *arena; 00208 00209 float childs_per_node; 00210 int nodes_with_childs[16]; 00211 int useless_bb; 00212 int nodes; 00213 00214 Reorganize_SVBVH(MemArena *a) 00215 { 00216 arena = a; 00217 nodes = 0; 00218 childs_per_node = 0; 00219 useless_bb = 0; 00220 00221 for(int i=0; i<16; i++) 00222 nodes_with_childs[i] = 0; 00223 } 00224 00225 ~Reorganize_SVBVH() 00226 { 00227 if(G.f & G_DEBUG) { 00228 printf("%f childs per node\n", childs_per_node / nodes); 00229 printf("%d childs BB are useless\n", useless_bb); 00230 for(int i=0; i<16; i++) 00231 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i]/float(nodes)); 00232 } 00233 } 00234 00235 SVBVHNode *create_node(int nchilds) 00236 { 00237 SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode)); 00238 node->nchilds = nchilds; 00239 00240 return node; 00241 } 00242 00243 void copy_bb(float *bb, const float *old_bb) 00244 { 00245 std::copy(old_bb, old_bb+6, bb); 00246 } 00247 00248 void prepare_for_simd(SVBVHNode *node) 00249 { 00250 int i=0; 00251 while(i+4 <= node->nchilds) 00252 { 00253 float vec_tmp[4*6]; 00254 float *res = node->child_bb+6*i; 00255 std::copy(res, res+6*4, vec_tmp); 00256 00257 for(int j=0; j<6; j++) 00258 { 00259 res[4*j+0] = vec_tmp[6*0+j]; 00260 res[4*j+1] = vec_tmp[6*1+j]; 00261 res[4*j+2] = vec_tmp[6*2+j]; 00262 res[4*j+3] = vec_tmp[6*3+j]; 00263 } 00264 00265 i += 4; 00266 } 00267 } 00268 00269 /* amt must be power of two */ 00270 inline int padup(int num, int amt) 00271 { 00272 return ((num+(amt-1))&~(amt-1)); 00273 } 00274 00275 SVBVHNode *transform(OldNode *old) 00276 { 00277 if(is_leaf(old)) 00278 return (SVBVHNode*)old; 00279 if(is_leaf(old->child)) 00280 return (SVBVHNode*)old->child; 00281 00282 int nchilds = count_childs(old); 00283 int alloc_childs = nchilds; 00284 if(nchilds % 4 > 2) 00285 alloc_childs = padup(nchilds, 4); 00286 00287 SVBVHNode *node = create_node(alloc_childs); 00288 00289 childs_per_node += nchilds; 00290 nodes++; 00291 if(nchilds < 16) 00292 nodes_with_childs[nchilds]++; 00293 00294 useless_bb += alloc_childs-nchilds; 00295 while(alloc_childs > nchilds) 00296 { 00297 const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN }; 00298 alloc_childs--; 00299 node->child[alloc_childs] = 0; 00300 copy_bb(node->child_bb+alloc_childs*6, def_bb); 00301 } 00302 00303 int i=nchilds; 00304 for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling) 00305 { 00306 i--; 00307 node->child[i] = transform(o_child); 00308 if(is_leaf(o_child)) 00309 { 00310 float bb[6]; 00311 INIT_MINMAX(bb, bb+3); 00312 RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3); 00313 copy_bb(node->child_bb+i*6, bb); 00314 break; 00315 } 00316 else 00317 { 00318 copy_bb(node->child_bb+i*6, o_child->bb); 00319 } 00320 } 00321 assert(i == 0); 00322 00323 prepare_for_simd(node); 00324 00325 return node; 00326 } 00327 }; 00328 00329 #endif 00330 00331 #endif //__SSE__ 00332