Blender V2.61 - r43446
|
00001 /* 00002 * Copyright 2011, Blender Foundation. 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU General Public License 00006 * as published by the Free Software Foundation; either version 2 00007 * of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software Foundation, 00016 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 #include <stdlib.h> 00020 #include <string.h> 00021 00022 #include "device.h" 00023 #include "device_intern.h" 00024 00025 #include "kernel.h" 00026 #include "kernel_types.h" 00027 00028 #include "osl_shader.h" 00029 00030 #include "util_debug.h" 00031 #include "util_foreach.h" 00032 #include "util_function.h" 00033 #include "util_opengl.h" 00034 #include "util_progress.h" 00035 #include "util_system.h" 00036 #include "util_thread.h" 00037 00038 CCL_NAMESPACE_BEGIN 00039 00040 class CPUDevice : public Device 00041 { 00042 public: 00043 vector<thread*> threads; 00044 ThreadQueue<DeviceTask> tasks; 00045 KernelGlobals *kg; 00046 00047 CPUDevice(int threads_num) 00048 { 00049 kg = kernel_globals_create(); 00050 00051 /* do now to avoid thread issues */ 00052 system_cpu_support_optimized(); 00053 00054 if(threads_num == 0) 00055 threads_num = system_cpu_thread_count(); 00056 00057 threads.resize(threads_num); 00058 00059 for(size_t i = 0; i < threads.size(); i++) 00060 threads[i] = new thread(function_bind(&CPUDevice::thread_run, this, i)); 00061 } 00062 00063 ~CPUDevice() 00064 { 00065 tasks.stop(); 00066 00067 foreach(thread *t, threads) { 00068 t->join(); 00069 delete t; 00070 } 00071 00072 kernel_globals_free(kg); 00073 } 00074 00075 bool support_full_kernel() 00076 { 00077 return true; 00078 } 00079 00080 string description() 00081 { 00082 return system_cpu_brand_string(); 00083 } 00084 00085 void mem_alloc(device_memory& mem, MemoryType type) 00086 { 00087 mem.device_pointer = mem.data_pointer; 00088 } 00089 00090 void mem_copy_to(device_memory& mem) 00091 { 00092 /* no-op */ 00093 } 00094 00095 void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) 00096 { 00097 /* no-op */ 00098 } 00099 00100 void mem_zero(device_memory& mem) 00101 { 00102 memset((void*)mem.device_pointer, 0, mem.memory_size()); 00103 } 00104 00105 void mem_free(device_memory& mem) 00106 { 00107 mem.device_pointer = 0; 00108 } 00109 00110 void const_copy_to(const char *name, void *host, size_t size) 00111 { 00112 kernel_const_copy(kg, name, host, size); 00113 } 00114 00115 void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) 00116 { 00117 kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height); 00118 mem.device_pointer = mem.data_pointer; 00119 } 00120 00121 void tex_free(device_memory& mem) 00122 { 00123 mem.device_pointer = 0; 00124 } 00125 00126 void *osl_memory() 00127 { 00128 #ifdef WITH_OSL 00129 return kernel_osl_memory(kg); 00130 #else 00131 return NULL; 00132 #endif 00133 } 00134 00135 void thread_run(int t) 00136 { 00137 DeviceTask task; 00138 00139 while(tasks.worker_wait_pop(task)) { 00140 if(task.type == DeviceTask::PATH_TRACE) 00141 thread_path_trace(task); 00142 else if(task.type == DeviceTask::TONEMAP) 00143 thread_tonemap(task); 00144 else if(task.type == DeviceTask::SHADER) 00145 thread_shader(task); 00146 00147 tasks.worker_done(); 00148 } 00149 } 00150 00151 void thread_path_trace(DeviceTask& task) 00152 { 00153 if(tasks.worker_cancel()) 00154 return; 00155 00156 #ifdef WITH_OSL 00157 if(kernel_osl_use(kg)) 00158 OSLShader::thread_init(kg); 00159 #endif 00160 00161 #ifdef WITH_OPTIMIZED_KERNEL 00162 if(system_cpu_support_optimized()) { 00163 for(int y = task.y; y < task.y + task.h; y++) { 00164 for(int x = task.x; x < task.x + task.w; x++) 00165 kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, 00166 task.sample, x, y, task.offset, task.stride); 00167 00168 if(tasks.worker_cancel()) 00169 break; 00170 } 00171 } 00172 else 00173 #endif 00174 { 00175 for(int y = task.y; y < task.y + task.h; y++) { 00176 for(int x = task.x; x < task.x + task.w; x++) 00177 kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, 00178 task.sample, x, y, task.offset, task.stride); 00179 00180 if(tasks.worker_cancel()) 00181 break; 00182 } 00183 } 00184 00185 #ifdef WITH_OSL 00186 if(kernel_osl_use(kg)) 00187 OSLShader::thread_free(kg); 00188 #endif 00189 } 00190 00191 void thread_tonemap(DeviceTask& task) 00192 { 00193 #ifdef WITH_OPTIMIZED_KERNEL 00194 if(system_cpu_support_optimized()) { 00195 for(int y = task.y; y < task.y + task.h; y++) 00196 for(int x = task.x; x < task.x + task.w; x++) 00197 kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, 00198 task.sample, task.resolution, x, y, task.offset, task.stride); 00199 } 00200 else 00201 #endif 00202 { 00203 for(int y = task.y; y < task.y + task.h; y++) 00204 for(int x = task.x; x < task.x + task.w; x++) 00205 kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, 00206 task.sample, task.resolution, x, y, task.offset, task.stride); 00207 } 00208 } 00209 00210 void thread_shader(DeviceTask& task) 00211 { 00212 #ifdef WITH_OSL 00213 if(kernel_osl_use(kg)) 00214 OSLShader::thread_init(kg); 00215 #endif 00216 00217 #ifdef WITH_OPTIMIZED_KERNEL 00218 if(system_cpu_support_optimized()) { 00219 for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { 00220 kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x); 00221 00222 if(tasks.worker_cancel()) 00223 break; 00224 } 00225 } 00226 else 00227 #endif 00228 { 00229 for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { 00230 kernel_cpu_shader(kg, (uint4*)task.shader_input, (float3*)task.shader_output, task.shader_eval_type, x); 00231 00232 if(tasks.worker_cancel()) 00233 break; 00234 } 00235 } 00236 00237 #ifdef WITH_OSL 00238 if(kernel_osl_use(kg)) 00239 OSLShader::thread_free(kg); 00240 #endif 00241 } 00242 00243 void task_add(DeviceTask& task) 00244 { 00245 /* split task into smaller ones, more than number of threads for uneven 00246 workloads where some parts of the image render slower than others */ 00247 task.split(tasks, threads.size()*10); 00248 } 00249 00250 void task_wait() 00251 { 00252 tasks.wait_done(); 00253 } 00254 00255 void task_cancel() 00256 { 00257 tasks.cancel(); 00258 } 00259 }; 00260 00261 Device *device_cpu_create(DeviceInfo& info, int threads) 00262 { 00263 return new CPUDevice(threads); 00264 } 00265 00266 void device_cpu_info(vector<DeviceInfo>& devices) 00267 { 00268 DeviceInfo info; 00269 00270 info.type = DEVICE_CPU; 00271 info.description = system_cpu_brand_string(); 00272 info.id = "CPU"; 00273 info.num = 0; 00274 00275 devices.insert(devices.begin(), info); 00276 } 00277 00278 CCL_NAMESPACE_END 00279