Blender V2.61 - r43446
|
00001 /* 00002 * Copyright 2011, Blender Foundation. 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU General Public License 00006 * as published by the Free Software Foundation; either version 2 00007 * of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software Foundation, 00016 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00017 */ 00018 00019 #ifdef WITH_OPENCL 00020 00021 #include <stdio.h> 00022 #include <stdlib.h> 00023 #include <string.h> 00024 00025 #include "device.h" 00026 #include "device_intern.h" 00027 00028 #include "util_foreach.h" 00029 #include "util_map.h" 00030 #include "util_math.h" 00031 #include "util_md5.h" 00032 #include "util_opencl.h" 00033 #include "util_opengl.h" 00034 #include "util_path.h" 00035 #include "util_time.h" 00036 00037 CCL_NAMESPACE_BEGIN 00038 00039 #define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p)) 00040 00041 class OpenCLDevice : public Device 00042 { 00043 public: 00044 cl_context cxContext; 00045 cl_command_queue cqCommandQueue; 00046 cl_platform_id cpPlatform; 00047 cl_device_id cdDevice; 00048 cl_program cpProgram; 00049 cl_kernel ckPathTraceKernel; 00050 cl_kernel ckFilmConvertKernel; 00051 cl_int ciErr; 00052 map<string, device_vector<uchar>*> const_mem_map; 00053 map<string, device_memory*> mem_map; 00054 device_ptr null_mem; 00055 bool device_initialized; 00056 string platform_name; 00057 00058 const char *opencl_error_string(cl_int err) 00059 { 00060 switch (err) { 00061 case CL_SUCCESS: return "Success!"; 00062 case CL_DEVICE_NOT_FOUND: return "Device not found."; 00063 case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; 00064 case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; 00065 case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; 00066 case CL_OUT_OF_RESOURCES: return "Out of resources"; 00067 case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; 00068 case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; 00069 case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; 00070 case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; 00071 case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; 00072 case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; 00073 case CL_MAP_FAILURE: return "Map failure"; 00074 case CL_INVALID_VALUE: return "Invalid value"; 00075 case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; 00076 case CL_INVALID_PLATFORM: return "Invalid platform"; 00077 case CL_INVALID_DEVICE: return "Invalid device"; 00078 case CL_INVALID_CONTEXT: return "Invalid context"; 00079 case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; 00080 case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; 00081 case CL_INVALID_HOST_PTR: return "Invalid host pointer"; 00082 case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; 00083 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; 00084 case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; 00085 case CL_INVALID_SAMPLER: return "Invalid sampler"; 00086 case CL_INVALID_BINARY: return "Invalid binary"; 00087 case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; 00088 case CL_INVALID_PROGRAM: return "Invalid program"; 00089 case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; 00090 case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; 00091 case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; 00092 case CL_INVALID_KERNEL: return "Invalid kernel"; 00093 case CL_INVALID_ARG_INDEX: return "Invalid argument index"; 00094 case CL_INVALID_ARG_VALUE: return "Invalid argument value"; 00095 case CL_INVALID_ARG_SIZE: return "Invalid argument size"; 00096 case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; 00097 case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; 00098 case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; 00099 case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; 00100 case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; 00101 case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; 00102 case CL_INVALID_EVENT: return "Invalid event"; 00103 case CL_INVALID_OPERATION: return "Invalid operation"; 00104 case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; 00105 case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; 00106 case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; 00107 default: return "Unknown"; 00108 } 00109 } 00110 00111 bool opencl_error(cl_int err) 00112 { 00113 if(err != CL_SUCCESS) { 00114 string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err)); 00115 if(error_msg == "") 00116 error_msg = message; 00117 fprintf(stderr, "%s\n", message.c_str()); 00118 return true; 00119 } 00120 00121 return false; 00122 } 00123 00124 void opencl_error(const string& message) 00125 { 00126 if(error_msg == "") 00127 error_msg = message; 00128 fprintf(stderr, "%s\n", message.c_str()); 00129 } 00130 00131 void opencl_assert(cl_int err) 00132 { 00133 if(err != CL_SUCCESS) { 00134 string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err)); 00135 if(error_msg == "") 00136 error_msg = message; 00137 fprintf(stderr, "%s\n", message.c_str()); 00138 #ifndef NDEBUG 00139 abort(); 00140 #endif 00141 } 00142 } 00143 00144 OpenCLDevice(DeviceInfo& info, bool background_) 00145 { 00146 background = background_; 00147 cpPlatform = NULL; 00148 cxContext = NULL; 00149 cqCommandQueue = NULL; 00150 cpProgram = NULL; 00151 ckPathTraceKernel = NULL; 00152 ckFilmConvertKernel = NULL; 00153 null_mem = 0; 00154 device_initialized = false; 00155 00156 /* setup platform */ 00157 cl_uint num_platforms; 00158 00159 ciErr = clGetPlatformIDs(0, NULL, &num_platforms); 00160 if(opencl_error(ciErr)) 00161 return; 00162 00163 if(num_platforms == 0) { 00164 opencl_error("OpenCL: no platforms found."); 00165 return; 00166 } 00167 00168 ciErr = clGetPlatformIDs(num_platforms, &cpPlatform, NULL); 00169 if(opencl_error(ciErr)) 00170 return; 00171 00172 char name[256]; 00173 clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL); 00174 platform_name = name; 00175 00176 /* get devices */ 00177 vector<cl_device_id> device_ids; 00178 cl_uint num_devices; 00179 00180 if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices))) 00181 return; 00182 00183 if(info.num > num_devices) { 00184 if(num_devices == 0) 00185 opencl_error("OpenCL: no devices found."); 00186 else 00187 opencl_error("OpenCL: specified device not found."); 00188 return; 00189 } 00190 00191 device_ids.resize(num_devices); 00192 00193 if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL))) 00194 return; 00195 00196 cdDevice = device_ids[info.num]; 00197 00198 /* create context */ 00199 cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr); 00200 if(opencl_error(ciErr)) 00201 return; 00202 00203 cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr); 00204 if(opencl_error(ciErr)) 00205 return; 00206 00207 null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr); 00208 device_initialized = true; 00209 } 00210 00211 bool opencl_version_check() 00212 { 00213 char version[256]; 00214 00215 int major, minor, req_major = 1, req_minor = 1; 00216 00217 clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); 00218 00219 if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) { 00220 opencl_error(string_printf("OpenCL: failed to parse platform version string (%s).", version)); 00221 return false; 00222 } 00223 00224 if(!((major == req_major && minor >= req_minor) || (major > req_major))) { 00225 opencl_error(string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor)); 00226 return false; 00227 } 00228 00229 clGetDeviceInfo(cdDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL); 00230 00231 if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) { 00232 opencl_error(string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version)); 00233 return false; 00234 } 00235 00236 if(!((major == req_major && minor >= req_minor) || (major > req_major))) { 00237 opencl_error(string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor)); 00238 return false; 00239 } 00240 00241 /* we don't check CL_DEVICE_VERSION since for e.g. nvidia sm 1.3 cards this is 00242 1.0 even if the language features are there, just limited shared memory */ 00243 00244 return true; 00245 } 00246 00247 bool load_binary(const string& kernel_path, const string& clbin) 00248 { 00249 /* read binary into memory */ 00250 vector<uint8_t> binary; 00251 00252 if(!path_read_binary(clbin, binary)) { 00253 opencl_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str())); 00254 return false; 00255 } 00256 00257 /* create program */ 00258 cl_int status; 00259 size_t size = binary.size(); 00260 const uint8_t *bytes = &binary[0]; 00261 00262 cpProgram = clCreateProgramWithBinary(cxContext, 1, &cdDevice, 00263 &size, &bytes, &status, &ciErr); 00264 00265 if(opencl_error(status) || opencl_error(ciErr)) { 00266 opencl_error(string_printf("OpenCL failed create program from cached binary %s.", clbin.c_str())); 00267 return false; 00268 } 00269 00270 if(!build_kernel(kernel_path)) 00271 return false; 00272 00273 return true; 00274 } 00275 00276 bool save_binary(const string& clbin) 00277 { 00278 size_t size = 0; 00279 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); 00280 00281 if(!size) 00282 return false; 00283 00284 vector<uint8_t> binary(size); 00285 uint8_t *bytes = &binary[0]; 00286 00287 clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL); 00288 00289 if(!path_write_binary(clbin, binary)) { 00290 opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str())); 00291 return false; 00292 } 00293 00294 return true; 00295 } 00296 00297 string kernel_build_options() 00298 { 00299 string build_options = " -cl-fast-relaxed-math "; 00300 00301 /* full shading only on NVIDIA cards at the moment */ 00302 if(platform_name == "NVIDIA CUDA") 00303 build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; 00304 if(platform_name == "Apple") 00305 build_options += " -D__CL_NO_FLOAT3__ "; 00306 00307 return build_options; 00308 } 00309 00310 bool build_kernel(const string& kernel_path) 00311 { 00312 string build_options = kernel_build_options(); 00313 00314 ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL); 00315 00316 if(ciErr != CL_SUCCESS) { 00317 /* show build errors */ 00318 char *build_log; 00319 size_t ret_val_size; 00320 00321 clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); 00322 00323 build_log = new char[ret_val_size+1]; 00324 clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); 00325 00326 build_log[ret_val_size] = '\0'; 00327 opencl_error("OpenCL build failed: errors in console"); 00328 fprintf(stderr, "%s\n", build_log); 00329 00330 delete[] build_log; 00331 00332 return false; 00333 } 00334 00335 return true; 00336 } 00337 00338 bool compile_kernel(const string& kernel_path, const string& kernel_md5) 00339 { 00340 /* we compile kernels consisting of many files. unfortunately opencl 00341 kernel caches do not seem to recognize changes in included files. 00342 so we force recompile on changes by adding the md5 hash of all files */ 00343 string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; 00344 source = path_source_replace_includes(source, kernel_path); 00345 00346 size_t source_len = source.size(); 00347 const char *source_str = source.c_str(); 00348 00349 cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr); 00350 00351 if(opencl_error(ciErr)) 00352 return false; 00353 00354 double starttime = time_dt(); 00355 printf("Compiling OpenCL kernel ...\n"); 00356 00357 if(!build_kernel(kernel_path)) 00358 return false; 00359 00360 printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); 00361 00362 return true; 00363 } 00364 00365 string device_md5_hash() 00366 { 00367 MD5Hash md5; 00368 char version[256], driver[256], name[256], vendor[256]; 00369 00370 clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL); 00371 clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL); 00372 clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL); 00373 clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL); 00374 00375 md5.append((uint8_t*)vendor, strlen(vendor)); 00376 md5.append((uint8_t*)version, strlen(version)); 00377 md5.append((uint8_t*)name, strlen(name)); 00378 md5.append((uint8_t*)driver, strlen(driver)); 00379 00380 string options = kernel_build_options(); 00381 md5.append((uint8_t*)options.c_str(), options.size()); 00382 00383 return md5.get_hex(); 00384 } 00385 00386 bool load_kernels(bool experimental) 00387 { 00388 /* verify if device was initialized */ 00389 if(!device_initialized) { 00390 fprintf(stderr, "OpenCL: failed to initialize device.\n"); 00391 return false; 00392 } 00393 00394 /* verify we have right opencl version */ 00395 if(!opencl_version_check()) 00396 return false; 00397 00398 /* md5 hash to detect changes */ 00399 string kernel_path = path_get("kernel"); 00400 string kernel_md5 = path_files_md5_hash(kernel_path); 00401 string device_md5 = device_md5_hash(); 00402 00403 /* try to use cache binary */ 00404 string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());; 00405 clbin = path_user_get(path_join("cache", clbin)); 00406 00407 if(path_exists(clbin)) { 00408 /* if exists already, try use it */ 00409 if(!load_binary(kernel_path, clbin)) 00410 return false; 00411 } 00412 else { 00413 /* compile kernel */ 00414 if(!compile_kernel(kernel_path, kernel_md5)) 00415 return false; 00416 00417 /* save binary for reuse */ 00418 save_binary(clbin); 00419 } 00420 00421 /* find kernels */ 00422 ckPathTraceKernel = clCreateKernel(cpProgram, "kernel_ocl_path_trace", &ciErr); 00423 if(opencl_error(ciErr)) 00424 return false; 00425 00426 ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr); 00427 if(opencl_error(ciErr)) 00428 return false; 00429 00430 return true; 00431 } 00432 00433 ~OpenCLDevice() 00434 { 00435 if(null_mem) 00436 clReleaseMemObject(CL_MEM_PTR(null_mem)); 00437 00438 map<string, device_vector<uchar>*>::iterator mt; 00439 for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { 00440 mem_free(*(mt->second)); 00441 delete mt->second; 00442 } 00443 00444 if(ckPathTraceKernel) 00445 clReleaseKernel(ckPathTraceKernel); 00446 if(ckFilmConvertKernel) 00447 clReleaseKernel(ckFilmConvertKernel); 00448 if(cpProgram) 00449 clReleaseProgram(cpProgram); 00450 if(cqCommandQueue) 00451 clReleaseCommandQueue(cqCommandQueue); 00452 if(cxContext) 00453 clReleaseContext(cxContext); 00454 } 00455 00456 bool support_full_kernel() 00457 { 00458 return false; 00459 } 00460 00461 string description() 00462 { 00463 char name[1024]; 00464 00465 clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL); 00466 00467 return string("OpenCL ") + name; 00468 } 00469 00470 void mem_alloc(device_memory& mem, MemoryType type) 00471 { 00472 size_t size = mem.memory_size(); 00473 00474 if(type == MEM_READ_ONLY) 00475 mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, size, NULL, &ciErr); 00476 else if(type == MEM_WRITE_ONLY) 00477 mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_WRITE_ONLY, size, NULL, &ciErr); 00478 else 00479 mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr); 00480 00481 opencl_assert(ciErr); 00482 } 00483 00484 void mem_copy_to(device_memory& mem) 00485 { 00486 /* this is blocking */ 00487 size_t size = mem.memory_size(); 00488 ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL); 00489 opencl_assert(ciErr); 00490 } 00491 00492 void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) 00493 { 00494 size_t offset = elem*y*w; 00495 size_t size = elem*w*h; 00496 00497 ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL); 00498 opencl_assert(ciErr); 00499 } 00500 00501 void mem_zero(device_memory& mem) 00502 { 00503 if(mem.device_pointer) { 00504 memset((void*)mem.data_pointer, 0, mem.memory_size()); 00505 mem_copy_to(mem); 00506 } 00507 } 00508 00509 void mem_free(device_memory& mem) 00510 { 00511 if(mem.device_pointer) { 00512 ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)); 00513 mem.device_pointer = 0; 00514 opencl_assert(ciErr); 00515 } 00516 } 00517 00518 void const_copy_to(const char *name, void *host, size_t size) 00519 { 00520 if(const_mem_map.find(name) == const_mem_map.end()) { 00521 device_vector<uchar> *data = new device_vector<uchar>(); 00522 data->copy((uchar*)host, size); 00523 00524 mem_alloc(*data, MEM_READ_ONLY); 00525 const_mem_map[name] = data; 00526 } 00527 else { 00528 device_vector<uchar> *data = const_mem_map[name]; 00529 data->copy((uchar*)host, size); 00530 } 00531 00532 mem_copy_to(*const_mem_map[name]); 00533 } 00534 00535 void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) 00536 { 00537 mem_alloc(mem, MEM_READ_ONLY); 00538 mem_copy_to(mem); 00539 mem_map[name] = &mem; 00540 } 00541 00542 void tex_free(device_memory& mem) 00543 { 00544 if(mem.data_pointer) 00545 mem_free(mem); 00546 } 00547 00548 size_t global_size_round_up(int group_size, int global_size) 00549 { 00550 int r = global_size % group_size; 00551 return global_size + ((r == 0)? 0: group_size - r); 00552 } 00553 00554 void path_trace(DeviceTask& task) 00555 { 00556 /* cast arguments to cl types */ 00557 cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); 00558 cl_mem d_buffer = CL_MEM_PTR(task.buffer); 00559 cl_mem d_rng_state = CL_MEM_PTR(task.rng_state); 00560 cl_int d_x = task.x; 00561 cl_int d_y = task.y; 00562 cl_int d_w = task.w; 00563 cl_int d_h = task.h; 00564 cl_int d_sample = task.sample; 00565 cl_int d_offset = task.offset; 00566 cl_int d_stride = task.stride; 00567 00568 /* sample arguments */ 00569 int narg = 0; 00570 ciErr = 0; 00571 00572 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data); 00573 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); 00574 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state); 00575 00576 #define KERNEL_TEX(type, ttype, name) \ 00577 ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); 00578 #include "kernel_textures.h" 00579 00580 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample); 00581 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x); 00582 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); 00583 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); 00584 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); 00585 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); 00586 ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); 00587 00588 opencl_assert(ciErr); 00589 00590 size_t workgroup_size; 00591 00592 clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice, 00593 CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL); 00594 00595 workgroup_size = max(sqrt((double)workgroup_size), 1.0); 00596 00597 size_t local_size[2] = {workgroup_size, workgroup_size}; 00598 size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)}; 00599 00600 /* run kernel */ 00601 ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL); 00602 opencl_assert(ciErr); 00603 opencl_assert(clFinish(cqCommandQueue)); 00604 } 00605 00606 cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name) 00607 { 00608 cl_mem ptr; 00609 cl_int err = 0; 00610 00611 if(mem_map.find(name) != mem_map.end()) { 00612 device_memory *mem = mem_map[name]; 00613 00614 ptr = CL_MEM_PTR(mem->device_pointer); 00615 } 00616 else { 00617 /* work around NULL not working, even though the spec says otherwise */ 00618 ptr = CL_MEM_PTR(null_mem); 00619 } 00620 00621 err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); 00622 opencl_assert(err); 00623 00624 return err; 00625 } 00626 00627 void tonemap(DeviceTask& task) 00628 { 00629 /* cast arguments to cl types */ 00630 cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); 00631 cl_mem d_rgba = CL_MEM_PTR(task.rgba); 00632 cl_mem d_buffer = CL_MEM_PTR(task.buffer); 00633 cl_int d_x = task.x; 00634 cl_int d_y = task.y; 00635 cl_int d_w = task.w; 00636 cl_int d_h = task.h; 00637 cl_int d_sample = task.sample; 00638 cl_int d_resolution = task.resolution; 00639 cl_int d_offset = task.offset; 00640 cl_int d_stride = task.stride; 00641 00642 /* sample arguments */ 00643 int narg = 0; 00644 ciErr = 0; 00645 00646 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); 00647 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); 00648 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); 00649 00650 #define KERNEL_TEX(type, ttype, name) \ 00651 ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); 00652 #include "kernel_textures.h" 00653 00654 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample); 00655 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution); 00656 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); 00657 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); 00658 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); 00659 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); 00660 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); 00661 ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); 00662 00663 opencl_assert(ciErr); 00664 00665 size_t workgroup_size; 00666 00667 clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice, 00668 CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL); 00669 00670 workgroup_size = max(sqrt((double)workgroup_size), 1.0); 00671 00672 size_t local_size[2] = {workgroup_size, workgroup_size}; 00673 size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)}; 00674 00675 /* run kernel */ 00676 ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL); 00677 opencl_assert(ciErr); 00678 opencl_assert(clFinish(cqCommandQueue)); 00679 } 00680 00681 void task_add(DeviceTask& maintask) 00682 { 00683 list<DeviceTask> tasks; 00684 00685 /* arbitrary limit to work around apple ATI opencl issue */ 00686 if(platform_name == "Apple") 00687 maintask.split_max_size(tasks, 76800); 00688 else 00689 tasks.push_back(maintask); 00690 00691 DeviceTask task; 00692 00693 foreach(DeviceTask& task, tasks) { 00694 if(task.type == DeviceTask::TONEMAP) 00695 tonemap(task); 00696 else if(task.type == DeviceTask::PATH_TRACE) 00697 path_trace(task); 00698 } 00699 } 00700 00701 void task_wait() 00702 { 00703 } 00704 00705 void task_cancel() 00706 { 00707 } 00708 }; 00709 00710 Device *device_opencl_create(DeviceInfo& info, bool background) 00711 { 00712 return new OpenCLDevice(info, background); 00713 } 00714 00715 void device_opencl_info(vector<DeviceInfo>& devices) 00716 { 00717 vector<cl_device_id> device_ids; 00718 cl_uint num_devices; 00719 cl_platform_id platform_id; 00720 cl_uint num_platforms; 00721 00722 /* get devices */ 00723 if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || num_platforms == 0) 00724 return; 00725 00726 if(clGetPlatformIDs(num_platforms, &platform_id, NULL) != CL_SUCCESS) 00727 return; 00728 00729 if(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices) != CL_SUCCESS) 00730 return; 00731 00732 device_ids.resize(num_devices); 00733 00734 if(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL) != CL_SUCCESS) 00735 return; 00736 00737 /* add devices */ 00738 for(int num = 0; num < num_devices; num++) { 00739 cl_device_id device_id = device_ids[num]; 00740 char name[1024]; 00741 00742 if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS) 00743 continue; 00744 00745 DeviceInfo info; 00746 00747 info.type = DEVICE_OPENCL; 00748 info.description = string(name); 00749 info.id = string_printf("OPENCL_%d", num); 00750 info.num = num; 00751 /* we don't know if it's used for display, but assume it is */ 00752 info.display_device = true; 00753 00754 devices.push_back(info); 00755 } 00756 } 00757 00758 CCL_NAMESPACE_END 00759 00760 #endif /* WITH_OPENCL */ 00761