Blender V2.61 - r43446

device_cpu.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright 2011, Blender Foundation.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 #include <stdlib.h>
00020 #include <string.h>
00021 
00022 #include "device.h"
00023 #include "device_intern.h"
00024 
00025 #include "kernel.h"
00026 #include "kernel_types.h"
00027 
00028 #include "osl_shader.h"
00029 
00030 #include "util_debug.h"
00031 #include "util_foreach.h"
00032 #include "util_function.h"
00033 #include "util_opengl.h"
00034 #include "util_progress.h"
00035 #include "util_system.h"
00036 #include "util_thread.h"
00037 
00038 CCL_NAMESPACE_BEGIN
00039 
00040 class CPUDevice : public Device
00041 {
00042 public:
00043     vector<thread*> threads;
00044     ThreadQueue<DeviceTask> tasks;
00045     KernelGlobals *kg;
00046     
00047     CPUDevice(int threads_num)
00048     {
00049         kg = kernel_globals_create();
00050 
00051         /* do now to avoid thread issues */
00052         system_cpu_support_optimized();
00053 
00054         if(threads_num == 0)
00055             threads_num = system_cpu_thread_count();
00056 
00057         threads.resize(threads_num);
00058 
00059         for(size_t i = 0; i < threads.size(); i++)
00060             threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i));
00061     }
00062 
00063     ~CPUDevice()
00064     {
00065         tasks.stop();
00066 
00067         foreach(thread *t, threads) {
00068             t->join();
00069             delete t;
00070         }
00071 
00072         kernel_globals_free(kg);
00073     }
00074 
00075     bool support_full_kernel()
00076     {
00077         return true;
00078     }
00079 
00080     string description()
00081     {
00082         return system_cpu_brand_string();
00083     }
00084 
00085     void mem_alloc(device_memory& mem, MemoryType type)
00086     {
00087         mem.device_pointer = mem.data_pointer;
00088     }
00089 
00090     void mem_copy_to(device_memory& mem)
00091     {
00092         /* no-op */
00093     }
00094 
00095     void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
00096     {
00097         /* no-op */
00098     }
00099 
00100     void mem_zero(device_memory& mem)
00101     {
00102         memset((void*)mem.device_pointer, 0, mem.memory_size());
00103     }
00104 
00105     void mem_free(device_memory& mem)
00106     {
00107         mem.device_pointer = 0;
00108     }
00109 
00110     void const_copy_to(const char *name, void *host, size_t size)
00111     {
00112         kernel_const_copy(kg, name, host, size);
00113     }
00114 
00115     void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
00116     {
00117         kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
00118         mem.device_pointer = mem.data_pointer;
00119     }
00120 
00121     void tex_free(device_memory& mem)
00122     {
00123         mem.device_pointer = 0;
00124     }
00125 
00126     void *osl_memory()
00127     {
00128 #ifdef WITH_OSL
00129         return kernel_osl_memory(kg);
00130 #else
00131         return NULL;
00132 #endif
00133     }
00134 
00135     void thread_run(int t)
00136     {
00137         DeviceTask task;
00138 
00139         while(tasks.worker_wait_pop(task)) {
00140             if(task.type == DeviceTask::PATH_TRACE)
00141                 thread_path_trace(task);
00142             else if(task.type == DeviceTask::TONEMAP)
00143                 thread_tonemap(task);
00144             else if(task.type == DeviceTask::SHADER)
00145                 thread_shader(task);
00146 
00147             tasks.worker_done();
00148         }
00149     }
00150 
00151     void thread_path_trace(DeviceTask& task)
00152     {
00153         if(tasks.worker_cancel())
00154             return;
00155 
00156 #ifdef WITH_OSL
00157         if(kernel_osl_use(kg))
00158             OSLShader::thread_init(kg);
00159 #endif
00160 
00161 #ifdef WITH_OPTIMIZED_KERNEL
00162         if(system_cpu_support_optimized()) {
00163             for(int y = task.y; y < task.y + task.h; y++) {
00164                 for(int x = task.x; x < task.x + task.w; x++)
00165                     kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
00166                         task.sample, x, y, task.offset, task.stride);
00167 
00168                 if(tasks.worker_cancel())
00169                     break;
00170             }
00171         }
00172         else
00173 #endif
00174         {
00175             for(int y = task.y; y < task.y + task.h; y++) {
00176                 for(int x = task.x; x < task.x + task.w; x++)
00177                     kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
00178                         task.sample, x, y, task.offset, task.stride);
00179 
00180                 if(tasks.worker_cancel())
00181                     break;
00182             }
00183         }
00184 
00185 #ifdef WITH_OSL
00186         if(kernel_osl_use(kg))
00187             OSLShader::thread_free(kg);
00188 #endif
00189     }
00190 
00191     void thread_tonemap(DeviceTask& task)
00192     {
00193 #ifdef WITH_OPTIMIZED_KERNEL
00194         if(system_cpu_support_optimized()) {
00195             for(int y = task.y; y < task.y + task.h; y++)
00196                 for(int x = task.x; x < task.x + task.w; x++)
00197                     kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
00198                         task.sample, task.resolution, x, y, task.offset, task.stride);
00199         }
00200         else
00201 #endif
00202         {
00203             for(int y = task.y; y < task.y + task.h; y++)
00204                 for(int x = task.x; x < task.x + task.w; x++)
00205                     kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
00206                         task.sample, task.resolution, x, y, task.offset, task.stride);
00207         }
00208     }
00209 
00210     void thread_shader(DeviceTask& task)
00211     {
00212 #ifdef WITH_OSL
00213         if(kernel_osl_use(kg))
00214             OSLShader::thread_init(kg);
00215 #endif
00216 
00217 #ifdef WITH_OPTIMIZED_KERNEL
00218         if(system_cpu_support_optimized()) {
00219             for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
00220                 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
00221 
00222                 if(tasks.worker_cancel())
00223                     break;
00224             }
00225         }
00226         else
00227 #endif
00228         {
00229             for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
00230                 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x);
00231 
00232                 if(tasks.worker_cancel())
00233                     break;
00234             }
00235         }
00236 
00237 #ifdef WITH_OSL
00238         if(kernel_osl_use(kg))
00239             OSLShader::thread_free(kg);
00240 #endif
00241     }
00242 
00243     void task_add(DeviceTask& task)
00244     {
00245         /* split task into smaller ones, more than number of threads for uneven
00246            workloads where some parts of the image render slower than others */
00247         task.split(tasks, threads.size()*10);
00248     }
00249 
00250     void task_wait()
00251     {
00252         tasks.wait_done();
00253     }
00254 
00255     void task_cancel()
00256     {
00257         tasks.cancel();
00258     }
00259 };
00260 
00261 Device *device_cpu_create(DeviceInfo& info, int threads)
00262 {
00263     return new CPUDevice(threads);
00264 }
00265 
00266 void device_cpu_info(vector<DeviceInfo>& devices)
00267 {
00268     DeviceInfo info;
00269 
00270     info.type = DEVICE_CPU;
00271     info.description = system_cpu_brand_string();
00272     info.id = "CPU";
00273     info.num = 0;
00274 
00275     devices.insert(devices.begin(), info);
00276 }
00277 
00278 CCL_NAMESPACE_END
00279