Blender V2.61 - r43446

kernel_mbvh.h

Go to the documentation of this file.
00001 /*
00002  * Copyright 2011, Blender Foundation.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 CCL_NAMESPACE_BEGIN
00020 
00021 #define MBVH_OBJECT_SENTINEL 0x76543210
00022 #define MBVH_NODE_SIZE 8
00023 #define MBVH_STACK_SIZE 1024
00024 #define MBVH_RAY_STACK_SIZE 10000
00025 
00026 typedef struct MBVHTask {
00027     int node;
00028     int index;
00029     int num;
00030     int object;
00031 } MBVHTask;
00032 
00033 typedef struct MVBHRay {
00034     float3 P;
00035     float u;
00036     float3 idir;
00037     float v;
00038     float t;
00039     int index;
00040     int object;
00041 
00042     float3 origP;
00043     float3 origD;
00044     float tmax;
00045 } MBVHRay;
00046 
00047 __device float3 mbvh_inverse_direction(float3 dir)
00048 {
00049     // Avoid divide by zero (ooeps = exp2f(-80.0f))
00050     float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
00051     float3 idir;
00052 
00053     idir.x = 1.0f / (fabsf(dir.x) > ooeps ? dir.x : copysignf(ooeps, dir.x));
00054     idir.y = 1.0f / (fabsf(dir.y) > ooeps ? dir.y : copysignf(ooeps, dir.y));
00055     idir.z = 1.0f / (fabsf(dir.z) > ooeps ? dir.z : copysignf(ooeps, dir.z));
00056 
00057     return idir;
00058 }
00059 
00060 __device void mbvh_instance_push(KernelGlobals *kg, int object, MBVHRay *ray)
00061 {
00062     Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
00063 
00064     ray->P = transform(&tfm, ray->origP);
00065 
00066     float3 dir = ray->origD;
00067 
00068     if(ray->t != ray->tmax) dir *= ray->t;
00069 
00070     dir = transform_direction(&tfm, dir);
00071     ray->idir = mbvh_inverse_direction(normalize(dir));
00072 
00073     if(ray->t != ray->tmax) ray->t = len(dir);
00074 }
00075 
00076 __device void mbvh_instance_pop(KernelGlobals *kg, int object, MBVHRay *ray)
00077 {
00078     Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
00079 
00080     if(ray->t != ray->tmax)
00081         ray->t = len(transform_direction(&tfm, (1.0f/(ray->idir)) * (ray->t)));
00082 
00083     ray->P = ray->origP;
00084     ray->idir = mbvh_inverse_direction(ray->origD);
00085 }
00086 
00087 /* Sven Woop's algorithm */
00088 __device void mbvh_triangle_intersect(KernelGlobals *kg, MBVHRay *ray, int object, int triAddr)
00089 {
00090     float3 P = ray->P;
00091     float3 idir = ray->idir;
00092 
00093     /* compute and check intersection t-value */
00094     float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+0);
00095     float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+1);
00096     float3 dir = 1.0f/idir;
00097 
00098     float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
00099     float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
00100     float t = Oz * invDz;
00101 
00102     if(t > 0.0f && t < ray->t) {
00103         /* compute and check barycentric u */
00104         float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
00105         float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
00106         float u = Ox + t*Dx;
00107 
00108         if(u >= 0.0f) {
00109             /* compute and check barycentric v */
00110             float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*MBVH_NODE_SIZE+2);
00111             float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
00112             float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
00113             float v = Oy + t*Dy;
00114 
00115             if(v >= 0.0f && u + v <= 1.0f) {
00116                 /* record intersection */
00117                 ray->index = triAddr;
00118                 ray->object = object;
00119                 ray->u = u;
00120                 ray->v = v;
00121                 ray->t = t;
00122             }
00123         }
00124     }
00125 }
00126 
00127 __device void mbvh_node_intersect(KernelGlobals *kg, __m128 *traverseChild,
00128     __m128 *tHit, float3 P, float3 idir, float t, int nodeAddr)
00129 {
00130     /* X axis */
00131     const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+0);
00132     const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
00133     const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+1);
00134     const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, _mm_set_ps1(P.x)), _mm_set_ps1(idir.x));
00135 
00136     __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
00137     __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
00138 
00139     /* Y axis */
00140     const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+2);
00141     const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
00142     const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+3);
00143     const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, _mm_set_ps1(P.y)), _mm_set_ps1(idir.y));
00144 
00145     tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
00146     tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
00147 
00148     /* Z axis */
00149     const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+4);
00150     const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
00151     const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*MBVH_NODE_SIZE+5);
00152     const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, _mm_set_ps1(P.z)), _mm_set_ps1(idir.z));
00153 
00154     tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
00155     tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
00156 
00157     /* compare and get mask */
00158     *traverseChild = _mm_cmple_ps(tmin, tmax);
00159 
00160     /* get distance XXX probably wrong */
00161     *tHit = tmin;
00162 }
00163 
00164 static void mbvh_sort_by_length(int id[4], float len[4])
00165 {
00166     for(int i = 1; i < 4; i++) {
00167         int j = i - 1;
00168 
00169         while(j >= 0 && len[j] > len[j+1]) {
00170             swap(len[j], len[j+1]);
00171             swap(id[j], id[j+1]);
00172             j--;
00173         }
00174     }
00175 }
00176 
00177 __device void scene_intersect(KernelGlobals *kg, MBVHRay *rays, int numrays)
00178 {
00179     /* traversal stacks */
00180     MBVHTask task_stack[MBVH_STACK_SIZE];
00181     int active_ray_stacks[4][MBVH_RAY_STACK_SIZE];
00182     int num_task, num_active[4] = {0, 0, 0, 0};
00183     __m128i one_mm = _mm_set1_epi32(1);
00184 
00185     /* push root node task on stack */
00186     task_stack[0].node = kernel_data.bvh.root;
00187     task_stack[0].index = 0;
00188     task_stack[0].num = numrays;
00189     task_stack[0].object = ~0;
00190     num_task = 1;
00191 
00192     /* push all rays in first SIMD lane */
00193     for(int i = 0; i < numrays; i++)
00194         active_ray_stacks[0][i] = i;
00195     num_active[0] = numrays;
00196     
00197     while(num_task >= 1) {
00198         /* pop task */
00199         MBVHTask task = task_stack[--num_task];
00200 
00201         if(task.node == MBVH_OBJECT_SENTINEL) {
00202             /* instance pop */
00203 
00204             /* pop rays from stack */
00205             num_active[task.index] -= task.num;
00206             int ray_offset = num_active[task.index];
00207 
00208             /* transform rays */
00209             for(int i = 0; i < task.num; i++) {
00210                 MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
00211                 mbvh_instance_pop(kg, task.object, ray);
00212             }
00213         }
00214         else if(task.node >= 0) {
00215             /* inner node? */
00216 
00217             /* pop rays from stack*/
00218             num_active[task.index] -= task.num;
00219             int ray_offset = num_active[task.index];
00220 
00221             /* initialze simd values */
00222             __m128i num_active_mm = _mm_load_si128((__m128i*)num_active);
00223             __m128 len_mm = _mm_set_ps1(0.0f);
00224 
00225             for(int i = 0; i < task.num; i++) {
00226                 int rayid = active_ray_stacks[task.index][ray_offset + i];
00227                 MVBHRay *ray = rays + rayid;
00228 
00229                 /* intersect 4 QBVH node children */
00230                 __m128 result;
00231                 __m128 thit;
00232 
00233                 mbvh_node_intersect(kg, &result, &thit, ray->P, ray->idir, ray->t, task.node);
00234 
00235                 /* update length for sorting */
00236                 len_mm = _mm_add_ps(len_mm, _mm_and_ps(thit, result));
00237 
00238                 /* push rays on stack */
00239                 for(int j = 0; j < 4; j++)
00240                     active_ray_stacks[j][num_active[j]] = rayid;
00241 
00242                 /* update num active */
00243                 __m128i resulti = _mm_and_si128(*((__m128i*)&result), one_mm);
00244                 num_active_mm = _mm_add_epi32(resulti, num_active_mm);
00245                 _mm_store_si128((__m128i*)num_active, num_active_mm);
00246             }
00247 
00248             if(num_active[0] || num_active[1] || num_active[2] || num_active[3]) {
00249                 /* load child node addresses */
00250                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, task.node);
00251                 int child[4] = {
00252                     __float_as_int(cnodes.x),
00253                     __float_as_int(cnodes.y),
00254                     __float_as_int(cnodes.z),
00255                     __float_as_int(cnodes.w)};
00256 
00257                 /* sort nodes by average intersection distance */
00258                 int ids[4] = {0, 1, 2, 3};
00259                 float len[4];
00260 
00261                 _mm_store_ps(len, len_mm);
00262                 mbvh_sort_by_length(ids, len);
00263 
00264                 /* push new tasks on stack */
00265                 for(int j = 0; j < 4; j++) {
00266                     if(num_active[j]) {
00267                         int id = ids[j];
00268 
00269                         task_stack[num_task].node = child[id];
00270                         task_stack[num_task].index = id;
00271                         task_stack[num_task].num = num_active[id];
00272                         task_stack[num_task].object = task.object;
00273                         num_task++;
00274                     }
00275                 }
00276             }
00277         }
00278         else {
00279             /* fetch leaf node data */
00280             float4 leaf = kernel_tex_fetch(__bvh_nodes, (-task.node-1)*MBVH_NODE_SIZE+(MBVH_NODE_SIZE-2));
00281             int triAddr = __float_as_int(leaf.x);
00282             int triAddr2 = __float_as_int(leaf.y);
00283 
00284             /* pop rays from stack*/
00285             num_active[task.index] -= task.num;
00286             int ray_offset = num_active[task.index];
00287 
00288             /* triangles */
00289             if(triAddr >= 0) {
00290                 int i, numq = (task.num >> 2) << 2;
00291 
00292                 /* SIMD ray leaf intersection */
00293                 for(i = 0; i < numq; i += 4) {
00294                     MBVHRay *ray4[4] = {
00295                         &rays[active_ray_stacks[task.index][ray_offset + i + 0]],
00296                         &rays[active_ray_stacks[task.index][ray_offset + i + 1]],
00297                         &rays[active_ray_stacks[task.index][ray_offset + i + 2]],
00298                         &rays[active_ray_stacks[task.index][ray_offset + i + 3]]};
00299 
00300                     /* load SoA */
00301 
00302                     while(triAddr < triAddr2) {
00303                         mbvh_triangle_intersect(ray4[0], task.object, task.node);
00304                         mbvh_triangle_intersect(ray4[1], task.object, task.node);
00305                         mbvh_triangle_intersect(ray4[2], task.object, task.node);
00306                         mbvh_triangle_intersect(ray4[3], task.object, task.node);
00307                         triAddr++;
00308 
00309                         /* some shadow ray optim could be done by setting t=0 */
00310                     }
00311 
00312                     /* store AoS */
00313                 }
00314 
00315                 /* mono ray leaf intersection */
00316                 for(; i < task.num; i++) {
00317                     MBVHRay *ray = &rays[active_ray_stacks[task.index][ray_offset + i]];
00318 
00319                     while(triAddr < triAddr2) {
00320                         mbvh_triangle_intersect(kg, ray, task.object, task.node);
00321                         triAddr++;
00322                     }
00323                 }
00324             }
00325             else {
00326                 /* instance push */
00327                 int object = -triAddr-1;
00328                 int node = triAddr;
00329 
00330                 /* push instance pop task */
00331                 task_stack[num_task].node = MBVH_OBJECT_SENTINEL;
00332                 task_stack[num_task].index = task.index;
00333                 task_stack[num_task].num = task.num;
00334                 task_stack[num_task].object = object;
00335                 num_task++;
00336 
00337                 num_active[task.index] += task.num;
00338 
00339                 /* push node task */
00340                 task_stack[num_task].node = node;
00341                 task_stack[num_task].index = task.index;
00342                 task_stack[num_task].num = task.num;
00343                 task_stack[num_task].object = object;
00344                 num_task++;
00345 
00346                 for(int i = 0; i < task.num; i++) {
00347                     int rayid = active_ray_stacks[task.index][ray_offset + i];
00348 
00349                     /* push on stack for last task */
00350                     active_ray_stacks[task.index][num_active[task.index]] = rayid;
00351                     num_active[task.index]++;
00352 
00353                     /* transform ray */
00354                     MBVHRay *ray = &rays[rayid];
00355                     mbvh_instance_push(kg, object, ray);
00356                 }
00357             }
00358         }
00359     }
00360 }
00361 
00362 __device void mbvh_set_ray(MBVHRay *rays, int i, Ray *ray, float tmax)
00363 {
00364     MBVHRay *mray = &rays[i];
00365 
00366     /* ray parameters in registers */
00367     mray->P = ray->P;
00368     mray->idir = mbvh_inverse_direction(ray->D);
00369     mray->t = tmax;
00370 }
00371 
00372 __device bool mbvh_get_intersection(MVBHRay *rays, int i, Intersection *isect, float tmax)
00373 {
00374     MBVHRay *mray = &rays[i];
00375 
00376     if(mray->t == tmax)
00377         return false;
00378     
00379     isect->t = mray->t;
00380     isect->u = mray->u;
00381     isect->v = mray->v;
00382     isect->index = mray->index;
00383     isect->object = mray->object;
00384 
00385     return true;
00386 }
00387 
00388 __device bool mbvh_get_shadow(MBVHRay *rays, int i, float tmax)
00389 {
00390     return (rays[i].t == tmax);
00391 }
00392 
00393 CCL_NAMESPACE_END
00394