Blender V2.61 - r43446
|
00001 /* 00002 * Copyright 2011, Blender Foundation. 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU General Public License 00006 * as published by the Free Software Foundation; either version 2 00007 * of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software Foundation, 00016 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 CCL_NAMESPACE_BEGIN 00020 00021 #define MBVH_OBJECT_SENTINEL 0x76543210 00022 #define MBVH_NODE_SIZE 8 00023 #define MBVH_STACK_SIZE 1024 00024 #define MBVH_RAY_STACK_SIZE 10000 00025 00026 typedef struct MBVHTask { 00027 int node; 00028 int index; 00029 int num; 00030 int object; 00031 } MBVHTask; 00032 00033 typedef struct MVBHRay { 00034 float3 P; 00035 float u; 00036 float3 idir; 00037 float v; 00038 float t; 00039 int index; 00040 int object; 00041 00042 float3 origP; 00043 float3 origD; 00044 float tmax; 00045 } MBVHRay; 00046 00047 __device float3 mbvh_inverse_direction(float3 dir) 00048 { 00049 // Avoid divide by zero (ooeps = exp2f(-80.0f)) 00050 float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; 00051 float3 idir; 00052 00053 idir.x = 1.0f / (fabsf(dir.x) > ooeps ? dir.x : copysignf(ooeps, dir.x)); 00054 idir.y = 1.0f / (fabsf(dir.y) > ooeps ? dir.y : copysignf(ooeps, dir.y)); 00055 idir.z = 1.0f / (fabsf(dir.z) > ooeps ? dir.z : copysignf(ooeps, dir.z)); 00056 00057 return idir; 00058 } 00059 00060 __device void mbvh_instance_push(KernelGlobals *kg, int object, MBVHRay *ray) 00061 { 00062 Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); 00063 00064 ray->P = transform(&tfm, ray->origP); 00065 00066 float3 dir = ray->origD; 00067 00068 if(ray->t != ray->tmax) dir *= ray->t; 00069 00070 dir = transform_direction(&tfm, dir); 00071 ray->idir = mbvh_inverse_direction(normalize(dir)); 00072 00073 if(ray->t != ray->tmax) ray->t = len(dir); 00074 } 00075 00076 __device void mbvh_instance_pop(KernelGlobals *kg, int object, MBVHRay *ray) 00077 { 00078 Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); 00079 00080 if(ray->t != ray->tmax) 00081 ray->t = len(transform_direction(&tfm, (1.0f/(ray->idir)) * (ray->t))); 00082 00083 ray->P = ray->origP; 00084 ray->idir = mbvh_inverse_direction(ray->origD); 00085 } 00086 00087 /* Sven Woop's algorithm */ 00088 __device void mbvh_triangle_intersect(KernelGlobals *kg, MBVHRay *ray, int object, int triAddr) 00089 { 00090 float3 P = ray->P; 00091 float3 idir = ray->idir; 00092 00093 /* compute and check intersection t-value */ 00094 float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+0); 00095 float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+1); 00096 float3 dir = 1.0f/idir; 00097 00098 float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; 00099 float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); 00100 float t = Oz * invDz; 00101 00102 if(t > 0.0f && t < ray->t) { 00103 /* compute and check barycentric u */ 00104 float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; 00105 float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; 00106 float u = Ox + t*Dx; 00107 00108 if(u >= 0.0f) { 00109 /* compute and check barycentric v */ 00110 float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+2); 00111 float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; 00112 float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; 00113 float v = Oy + t*Dy; 00114 00115 if(v >= 0.0f && u + v <= 1.0f) { 00116 /* record intersection */ 00117 ray->index = triAddr; 00118 ray->object = object; 00119 ray->u = u; 00120 ray->v = v; 00121 ray->t = t; 00122 } 00123 } 00124 } 00125 } 00126 00127 __device void mbvh_node_intersect(KernelGlobals *kg, __m128 *traverseChild, 00128 __m128 *tHit, float3 P, float3 idir, float t, int nodeAddr) 00129 { 00130 /* X axis */ 00131 const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+0); 00132 const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); 00133 const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+1); 00134 const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x)); 00135 00136 __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps()); 00137 __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t)); 00138 00139 /* Y axis */ 00140 const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+2); 00141 const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); 00142 const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+3); 00143 const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y)); 00144 00145 tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin); 00146 tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax); 00147 00148 /* Z axis */ 00149 const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+4); 00150 const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); 00151 const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+5); 00152 const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z)); 00153 00154 tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin); 00155 tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax); 00156 00157 /* compare and get mask */ 00158 *traverseChild = _mm_cmple_ps(tmin, tmax); 00159 00160 /* get distance XXX probably wrong */ 00161 *tHit = tmin; 00162 } 00163 00164 static void mbvh_sort_by_length(int id[4], float len[4]) 00165 { 00166 for(int i = 1; i < 4; i++) { 00167 int j = i - 1; 00168 00169 while(j >= 0 && len[j] > len[j+1]) { 00170 swap(len[j], len[j+1]); 00171 swap(id[j], id[j+1]); 00172 j--; 00173 } 00174 } 00175 } 00176 00177 __device void scene_intersect(KernelGlobals *kg, MBVHRay *rays, int numrays) 00178 { 00179 /* traversal stacks */ 00180 MBVHTask task_stack[MBVH_STACK_SIZE]; 00181 int active_ray_stacks[4][MBVH_RAY_STACK_SIZE]; 00182 int num_task, num_active[4] = {0, 0, 0, 0}; 00183 __m128i one_mm = _mm_set1_epi32(1); 00184 00185 /* push root node task on stack */ 00186 task_stack[0].node = kernel_data.bvh.root; 00187 task_stack[0].index = 0; 00188 task_stack[0].num = numrays; 00189 task_stack[0].object = ~0; 00190 num_task = 1; 00191 00192 /* push all rays in first SIMD lane */ 00193 for(int i = 0; i < numrays; i++) 00194 active_ray_stacks[0][i] = i; 00195 num_active[0] = numrays; 00196 00197 while(num_task >= 1) { 00198 /* pop task */ 00199 MBVHTask task = task_stack[--num_task]; 00200 00201 if(task.node == MBVH_OBJECT_SENTINEL) { 00202 /* instance pop */ 00203 00204 /* pop rays from stack */ 00205 num_active[task.index] -= task.num; 00206 int ray_offset = num_active[task.index]; 00207 00208 /* transform rays */ 00209 for(int i = 0; i < task.num; i++) { 00210 MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]]; 00211 mbvh_instance_pop(kg, task.object, ray); 00212 } 00213 } 00214 else if(task.node >= 0) { 00215 /* inner node? */ 00216 00217 /* pop rays from stack*/ 00218 num_active[task.index] -= task.num; 00219 int ray_offset = num_active[task.index]; 00220 00221 /* initialze simd values */ 00222 __m128i num_active_mm = _mm_load_si128((__m128i*)num_active); 00223 __m128 len_mm = _mm_set_ps1(0.0f); 00224 00225 for(int i = 0; i < task.num; i++) { 00226 int rayid = active_ray_stacks[task.index][ray_offset + i]; 00227 MVBHRay *ray = rays + rayid; 00228 00229 /* intersect 4 QBVH node children */ 00230 __m128 result; 00231 __m128 thit; 00232 00233 mbvh_node_intersect(kg, &result, &thit, ray->P, ray->idir, ray->t, task.node); 00234 00235 /* update length for sorting */ 00236 len_mm = _mm_add_ps(len_mm, _mm_and_ps(thit, result)); 00237 00238 /* push rays on stack */ 00239 for(int j = 0; j < 4; j++) 00240 active_ray_stacks[j][num_active[j]] = rayid; 00241 00242 /* update num active */ 00243 __m128i resulti = _mm_and_si128(*((__m128i*)&result), one_mm); 00244 num_active_mm = _mm_add_epi32(resulti, num_active_mm); 00245 _mm_store_si128((__m128i*)num_active, num_active_mm); 00246 } 00247 00248 if(num_active[0] || num_active[1] || num_active[2] || num_active[3]) { 00249 /* load child node addresses */ 00250 float4 cnodes = kernel_tex_fetch(__bvh_nodes, task.node); 00251 int child[4] = { 00252 __float_as_int(cnodes.x), 00253 __float_as_int(cnodes.y), 00254 __float_as_int(cnodes.z), 00255 __float_as_int(cnodes.w)}; 00256 00257 /* sort nodes by average intersection distance */ 00258 int ids[4] = {0, 1, 2, 3}; 00259 float len[4]; 00260 00261 _mm_store_ps(len, len_mm); 00262 mbvh_sort_by_length(ids, len); 00263 00264 /* push new tasks on stack */ 00265 for(int j = 0; j < 4; j++) { 00266 if(num_active[j]) { 00267 int id = ids[j]; 00268 00269 task_stack[num_task].node = child[id]; 00270 task_stack[num_task].index = id; 00271 task_stack[num_task].num = num_active[id]; 00272 task_stack[num_task].object = task.object; 00273 num_task++; 00274 } 00275 } 00276 } 00277 } 00278 else { 00279 /* fetch leaf node data */ 00280 float4 leaf = kernel_tex_fetch(__bvh_nodes, (-task.node-1)*MBVH_NODE_SIZE+(MBVH_NODE_SIZE-2)); 00281 int triAddr = __float_as_int(leaf.x); 00282 int triAddr2 = __float_as_int(leaf.y); 00283 00284 /* pop rays from stack*/ 00285 num_active[task.index] -= task.num; 00286 int ray_offset = num_active[task.index]; 00287 00288 /* triangles */ 00289 if(triAddr >= 0) { 00290 int i, numq = (task.num >> 2) << 2; 00291 00292 /* SIMD ray leaf intersection */ 00293 for(i = 0; i < numq; i += 4) { 00294 MBVHRay *ray4[4] = { 00295 &rays[active_ray_stacks[task.index][ray_offset + i + 0]], 00296 &rays[active_ray_stacks[task.index][ray_offset + i + 1]], 00297 &rays[active_ray_stacks[task.index][ray_offset + i + 2]], 00298 &rays[active_ray_stacks[task.index][ray_offset + i + 3]]}; 00299 00300 /* load SoA */ 00301 00302 while(triAddr < triAddr2) { 00303 mbvh_triangle_intersect(ray4[0], task.object, task.node); 00304 mbvh_triangle_intersect(ray4[1], task.object, task.node); 00305 mbvh_triangle_intersect(ray4[2], task.object, task.node); 00306 mbvh_triangle_intersect(ray4[3], task.object, task.node); 00307 triAddr++; 00308 00309 /* some shadow ray optim could be done by setting t=0 */ 00310 } 00311 00312 /* store AoS */ 00313 } 00314 00315 /* mono ray leaf intersection */ 00316 for(; i < task.num; i++) { 00317 MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]]; 00318 00319 while(triAddr < triAddr2) { 00320 mbvh_triangle_intersect(kg, ray, task.object, task.node); 00321 triAddr++; 00322 } 00323 } 00324 } 00325 else { 00326 /* instance push */ 00327 int object = -triAddr-1; 00328 int node = triAddr; 00329 00330 /* push instance pop task */ 00331 task_stack[num_task].node = MBVH_OBJECT_SENTINEL; 00332 task_stack[num_task].index = task.index; 00333 task_stack[num_task].num = task.num; 00334 task_stack[num_task].object = object; 00335 num_task++; 00336 00337 num_active[task.index] += task.num; 00338 00339 /* push node task */ 00340 task_stack[num_task].node = node; 00341 task_stack[num_task].index = task.index; 00342 task_stack[num_task].num = task.num; 00343 task_stack[num_task].object = object; 00344 num_task++; 00345 00346 for(int i = 0; i < task.num; i++) { 00347 int rayid = active_ray_stacks[task.index][ray_offset + i]; 00348 00349 /* push on stack for last task */ 00350 active_ray_stacks[task.index][num_active[task.index]] = rayid; 00351 num_active[task.index]++; 00352 00353 /* transform ray */ 00354 MBVHRay *ray = &rays[rayid]; 00355 mbvh_instance_push(kg, object, ray); 00356 } 00357 } 00358 } 00359 } 00360 } 00361 00362 __device void mbvh_set_ray(MBVHRay *rays, int i, Ray *ray, float tmax) 00363 { 00364 MBVHRay *mray = &rays[i]; 00365 00366 /* ray parameters in registers */ 00367 mray->P = ray->P; 00368 mray->idir = mbvh_inverse_direction(ray->D); 00369 mray->t = tmax; 00370 } 00371 00372 __device bool mbvh_get_intersection(MVBHRay *rays, int i, Intersection *isect, float tmax) 00373 { 00374 MBVHRay *mray = &rays[i]; 00375 00376 if(mray->t == tmax) 00377 return false; 00378 00379 isect->t = mray->t; 00380 isect->u = mray->u; 00381 isect->v = mray->v; 00382 isect->index = mray->index; 00383 isect->object = mray->object; 00384 00385 return true; 00386 } 00387 00388 __device bool mbvh_get_shadow(MBVHRay *rays, int i, float tmax) 00389 { 00390 return (rays[i].t == tmax); 00391 } 00392 00393 CCL_NAMESPACE_END 00394