Blender V2.61 - r43446

device_opencl.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright 2011, Blender Foundation.
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software Foundation,
00016  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 #ifdef WITH_OPENCL
00020 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 
00025 #include "device.h"
00026 #include "device_intern.h"
00027 
00028 #include "util_foreach.h"
00029 #include "util_map.h"
00030 #include "util_math.h"
00031 #include "util_md5.h"
00032 #include "util_opencl.h"
00033 #include "util_opengl.h"
00034 #include "util_path.h"
00035 #include "util_time.h"
00036 
00037 CCL_NAMESPACE_BEGIN
00038 
00039 #define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
00040 
00041 class OpenCLDevice : public Device
00042 {
00043 public:
00044     cl_context cxContext;
00045     cl_command_queue cqCommandQueue;
00046     cl_platform_id cpPlatform;
00047     cl_device_id cdDevice;
00048     cl_program cpProgram;
00049     cl_kernel ckPathTraceKernel;
00050     cl_kernel ckFilmConvertKernel;
00051     cl_int ciErr;
00052     map<string, device_vector<uchar>*> const_mem_map;
00053     map<string, device_memory*> mem_map;
00054     device_ptr null_mem;
00055     bool device_initialized;
00056     string platform_name;
00057 
00058     const char *opencl_error_string(cl_int err)
00059     {
00060         switch (err) {
00061             case CL_SUCCESS: return "Success!";
00062             case CL_DEVICE_NOT_FOUND: return "Device not found.";
00063             case CL_DEVICE_NOT_AVAILABLE: return "Device not available";
00064             case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available";
00065             case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure";
00066             case CL_OUT_OF_RESOURCES: return "Out of resources";
00067             case CL_OUT_OF_HOST_MEMORY: return "Out of host memory";
00068             case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available";
00069             case CL_MEM_COPY_OVERLAP: return "Memory copy overlap";
00070             case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch";
00071             case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported";
00072             case CL_BUILD_PROGRAM_FAILURE: return "Program build failure";
00073             case CL_MAP_FAILURE: return "Map failure";
00074             case CL_INVALID_VALUE: return "Invalid value";
00075             case CL_INVALID_DEVICE_TYPE: return "Invalid device type";
00076             case CL_INVALID_PLATFORM: return "Invalid platform";
00077             case CL_INVALID_DEVICE: return "Invalid device";
00078             case CL_INVALID_CONTEXT: return "Invalid context";
00079             case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties";
00080             case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue";
00081             case CL_INVALID_HOST_PTR: return "Invalid host pointer";
00082             case CL_INVALID_MEM_OBJECT: return "Invalid memory object";
00083             case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor";
00084             case CL_INVALID_IMAGE_SIZE: return "Invalid image size";
00085             case CL_INVALID_SAMPLER: return "Invalid sampler";
00086             case CL_INVALID_BINARY: return "Invalid binary";
00087             case CL_INVALID_BUILD_OPTIONS: return "Invalid build options";
00088             case CL_INVALID_PROGRAM: return "Invalid program";
00089             case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable";
00090             case CL_INVALID_KERNEL_NAME: return "Invalid kernel name";
00091             case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition";
00092             case CL_INVALID_KERNEL: return "Invalid kernel";
00093             case CL_INVALID_ARG_INDEX: return "Invalid argument index";
00094             case CL_INVALID_ARG_VALUE: return "Invalid argument value";
00095             case CL_INVALID_ARG_SIZE: return "Invalid argument size";
00096             case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments";
00097             case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension";
00098             case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size";
00099             case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size";
00100             case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset";
00101             case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list";
00102             case CL_INVALID_EVENT: return "Invalid event";
00103             case CL_INVALID_OPERATION: return "Invalid operation";
00104             case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object";
00105             case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size";
00106             case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level";
00107             default: return "Unknown";
00108         }
00109     }
00110 
00111     bool opencl_error(cl_int err)
00112     {
00113         if(err != CL_SUCCESS) {
00114             string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err));
00115             if(error_msg == "")
00116                 error_msg = message;
00117             fprintf(stderr, "%s\n", message.c_str());
00118             return true;
00119         }
00120 
00121         return false;
00122     }
00123 
00124     void opencl_error(const string& message)
00125     {
00126         if(error_msg == "")
00127             error_msg = message;
00128         fprintf(stderr, "%s\n", message.c_str());
00129     }
00130 
00131     void opencl_assert(cl_int err)
00132     {
00133         if(err != CL_SUCCESS) {
00134             string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err));
00135             if(error_msg == "")
00136                 error_msg = message;
00137             fprintf(stderr, "%s\n", message.c_str());
00138 #ifndef NDEBUG
00139             abort();
00140 #endif
00141         }
00142     }
00143 
00144     OpenCLDevice(DeviceInfo& info, bool background_)
00145     {
00146         background = background_;
00147         cpPlatform = NULL;
00148         cxContext = NULL;
00149         cqCommandQueue = NULL;
00150         cpProgram = NULL;
00151         ckPathTraceKernel = NULL;
00152         ckFilmConvertKernel = NULL;
00153         null_mem = 0;
00154         device_initialized = false;
00155 
00156         /* setup platform */
00157         cl_uint num_platforms;
00158 
00159         ciErr = clGetPlatformIDs(0, NULL, &num_platforms);
00160         if(opencl_error(ciErr))
00161             return;
00162 
00163         if(num_platforms == 0) {
00164             opencl_error("OpenCL: no platforms found.");
00165             return;
00166         }
00167 
00168         ciErr = clGetPlatformIDs(num_platforms, &cpPlatform, NULL);
00169         if(opencl_error(ciErr))
00170             return;
00171 
00172         char name[256];
00173         clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
00174         platform_name = name;
00175 
00176         /* get devices */
00177         vector<cl_device_id> device_ids;
00178         cl_uint num_devices;
00179 
00180         if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices)))
00181             return;
00182 
00183         if(info.num > num_devices) {
00184             if(num_devices == 0)
00185                 opencl_error("OpenCL: no devices found.");
00186             else
00187                 opencl_error("OpenCL: specified device not found.");
00188             return;
00189         }
00190 
00191         device_ids.resize(num_devices);
00192         
00193         if(opencl_error(clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL)))
00194             return;
00195 
00196         cdDevice = device_ids[info.num];
00197 
00198         /* create context */
00199         cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
00200         if(opencl_error(ciErr))
00201             return;
00202 
00203         cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
00204         if(opencl_error(ciErr))
00205             return;
00206 
00207         null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
00208         device_initialized = true;
00209     }
00210 
00211     bool opencl_version_check()
00212     {
00213         char version[256];
00214 
00215         int major, minor, req_major = 1, req_minor = 1;
00216 
00217         clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
00218 
00219         if(sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
00220             opencl_error(string_printf("OpenCL: failed to parse platform version string (%s).", version));
00221             return false;
00222         }
00223 
00224         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
00225             opencl_error(string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor));
00226             return false;
00227         }
00228 
00229         clGetDeviceInfo(cdDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
00230 
00231         if(sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
00232             opencl_error(string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version));
00233             return false;
00234         }
00235 
00236         if(!((major == req_major && minor >= req_minor) || (major > req_major))) {
00237             opencl_error(string_printf("OpenCL: C version 1.1 or later required, found %d.%d", major, minor));
00238             return false;
00239         }
00240 
00241         /* we don't check CL_DEVICE_VERSION since for e.g. nvidia sm 1.3 cards this is
00242             1.0 even if the language features are there, just limited shared memory */
00243 
00244         return true;
00245     }
00246 
00247     bool load_binary(const string& kernel_path, const string& clbin)
00248     {
00249         /* read binary into memory */
00250         vector<uint8_t> binary;
00251 
00252         if(!path_read_binary(clbin, binary)) {
00253             opencl_error(string_printf("OpenCL failed to read cached binary %s.", clbin.c_str()));
00254             return false;
00255         }
00256 
00257         /* create program */
00258         cl_int status;
00259         size_t size = binary.size();
00260         const uint8_t *bytes = &binary[0];
00261 
00262         cpProgram = clCreateProgramWithBinary(cxContext, 1, &cdDevice,
00263             &size, &bytes, &status, &ciErr);
00264 
00265         if(opencl_error(status) || opencl_error(ciErr)) {
00266             opencl_error(string_printf("OpenCL failed create program from cached binary %s.", clbin.c_str()));
00267             return false;
00268         }
00269 
00270         if(!build_kernel(kernel_path))
00271             return false;
00272 
00273         return true;
00274     }
00275 
00276     bool save_binary(const string& clbin)
00277     {
00278         size_t size = 0;
00279         clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL);
00280 
00281         if(!size)
00282             return false;
00283 
00284         vector<uint8_t> binary(size);
00285         uint8_t *bytes = &binary[0];
00286 
00287         clGetProgramInfo(cpProgram, CL_PROGRAM_BINARIES, sizeof(uint8_t*), &bytes, NULL);
00288 
00289         if(!path_write_binary(clbin, binary)) {
00290             opencl_error(string_printf("OpenCL failed to write cached binary %s.", clbin.c_str()));
00291             return false;
00292         }
00293 
00294         return true;
00295     }
00296 
00297     string kernel_build_options()
00298     {
00299         string build_options = " -cl-fast-relaxed-math ";
00300         
00301         /* full shading only on NVIDIA cards at the moment */
00302         if(platform_name == "NVIDIA CUDA")
00303             build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
00304         if(platform_name == "Apple")
00305             build_options += " -D__CL_NO_FLOAT3__ ";
00306 
00307         return build_options;
00308     }
00309 
00310     bool build_kernel(const string& kernel_path)
00311     {
00312         string build_options = kernel_build_options();
00313     
00314         ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
00315 
00316         if(ciErr != CL_SUCCESS) {
00317             /* show build errors */
00318             char *build_log;
00319             size_t ret_val_size;
00320 
00321             clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
00322 
00323             build_log = new char[ret_val_size+1];
00324             clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
00325 
00326             build_log[ret_val_size] = '\0';
00327             opencl_error("OpenCL build failed: errors in console");
00328             fprintf(stderr, "%s\n", build_log);
00329 
00330             delete[] build_log;
00331 
00332             return false;
00333         }
00334 
00335         return true;
00336     }
00337 
00338     bool compile_kernel(const string& kernel_path, const string& kernel_md5)
00339     {
00340         /* we compile kernels consisting of many files. unfortunately opencl
00341            kernel caches do not seem to recognize changes in included files.
00342            so we force recompile on changes by adding the md5 hash of all files */
00343         string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
00344         source = path_source_replace_includes(source, kernel_path);
00345 
00346         size_t source_len = source.size();
00347         const char *source_str = source.c_str();
00348 
00349         cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr);
00350 
00351         if(opencl_error(ciErr))
00352             return false;
00353 
00354         double starttime = time_dt();
00355         printf("Compiling OpenCL kernel ...\n");
00356 
00357         if(!build_kernel(kernel_path))
00358             return false;
00359 
00360         printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
00361 
00362         return true;
00363     }
00364 
00365     string device_md5_hash()
00366     {
00367         MD5Hash md5;
00368         char version[256], driver[256], name[256], vendor[256];
00369 
00370         clGetPlatformInfo(cpPlatform, CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
00371         clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
00372         clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
00373         clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(driver), &driver, NULL);
00374 
00375         md5.append((uint8_t*)vendor, strlen(vendor));
00376         md5.append((uint8_t*)version, strlen(version));
00377         md5.append((uint8_t*)name, strlen(name));
00378         md5.append((uint8_t*)driver, strlen(driver));
00379 
00380         string options = kernel_build_options();
00381         md5.append((uint8_t*)options.c_str(), options.size());
00382 
00383         return md5.get_hex();
00384     }
00385 
00386     bool load_kernels(bool experimental)
00387     {
00388         /* verify if device was initialized */
00389         if(!device_initialized) {
00390             fprintf(stderr, "OpenCL: failed to initialize device.\n");
00391             return false;
00392         }
00393 
00394         /* verify we have right opencl version */
00395         if(!opencl_version_check())
00396             return false;
00397 
00398         /* md5 hash to detect changes */
00399         string kernel_path = path_get("kernel");
00400         string kernel_md5 = path_files_md5_hash(kernel_path);
00401         string device_md5 = device_md5_hash();
00402 
00403         /* try to use cache binary */
00404         string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());;
00405         clbin = path_user_get(path_join("cache", clbin));
00406 
00407         if(path_exists(clbin)) {
00408             /* if exists already, try use it */
00409             if(!load_binary(kernel_path, clbin))
00410                 return false;
00411         }
00412         else {
00413             /* compile kernel */
00414             if(!compile_kernel(kernel_path, kernel_md5))
00415                 return false;
00416 
00417             /* save binary for reuse */
00418             save_binary(clbin);
00419         }
00420 
00421         /* find kernels */
00422         ckPathTraceKernel = clCreateKernel(cpProgram, "kernel_ocl_path_trace", &ciErr);
00423         if(opencl_error(ciErr))
00424             return false;
00425 
00426         ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
00427         if(opencl_error(ciErr))
00428             return false;
00429 
00430         return true;
00431     }
00432 
00433     ~OpenCLDevice()
00434     {
00435         if(null_mem)
00436             clReleaseMemObject(CL_MEM_PTR(null_mem));
00437 
00438         map<string, device_vector<uchar>*>::iterator mt;
00439         for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
00440             mem_free(*(mt->second));
00441             delete mt->second;
00442         }
00443 
00444         if(ckPathTraceKernel)
00445             clReleaseKernel(ckPathTraceKernel);  
00446         if(ckFilmConvertKernel)
00447             clReleaseKernel(ckFilmConvertKernel);  
00448         if(cpProgram)
00449             clReleaseProgram(cpProgram);
00450         if(cqCommandQueue)
00451             clReleaseCommandQueue(cqCommandQueue);
00452         if(cxContext)
00453             clReleaseContext(cxContext);
00454     }
00455 
00456     bool support_full_kernel()
00457     {
00458         return false;
00459     }
00460 
00461     string description()
00462     {
00463         char name[1024];
00464 
00465         clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
00466 
00467         return string("OpenCL ") + name;
00468     }
00469 
00470     void mem_alloc(device_memory& mem, MemoryType type)
00471     {
00472         size_t size = mem.memory_size();
00473 
00474         if(type == MEM_READ_ONLY)
00475             mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, size, NULL, &ciErr);
00476         else if(type == MEM_WRITE_ONLY)
00477             mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_WRITE_ONLY, size, NULL, &ciErr);
00478         else
00479             mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr);
00480 
00481         opencl_assert(ciErr);
00482     }
00483 
00484     void mem_copy_to(device_memory& mem)
00485     {
00486         /* this is blocking */
00487         size_t size = mem.memory_size();
00488         ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL);
00489         opencl_assert(ciErr);
00490     }
00491 
00492     void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
00493     {
00494         size_t offset = elem*y*w;
00495         size_t size = elem*w*h;
00496 
00497         ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL);
00498         opencl_assert(ciErr);
00499     }
00500 
00501     void mem_zero(device_memory& mem)
00502     {
00503         if(mem.device_pointer) {
00504             memset((void*)mem.data_pointer, 0, mem.memory_size());
00505             mem_copy_to(mem);
00506         }
00507     }
00508 
00509     void mem_free(device_memory& mem)
00510     {
00511         if(mem.device_pointer) {
00512             ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer));
00513             mem.device_pointer = 0;
00514             opencl_assert(ciErr);
00515         }
00516     }
00517 
00518     void const_copy_to(const char *name, void *host, size_t size)
00519     {
00520         if(const_mem_map.find(name) == const_mem_map.end()) {
00521             device_vector<uchar> *data = new device_vector<uchar>();
00522             data->copy((uchar*)host, size);
00523 
00524             mem_alloc(*data, MEM_READ_ONLY);
00525             const_mem_map[name] = data;
00526         }
00527         else {
00528             device_vector<uchar> *data = const_mem_map[name];
00529             data->copy((uchar*)host, size);
00530         }
00531 
00532         mem_copy_to(*const_mem_map[name]);
00533     }
00534 
00535     void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
00536     {
00537         mem_alloc(mem, MEM_READ_ONLY);
00538         mem_copy_to(mem);
00539         mem_map[name] = &mem;
00540     }
00541 
00542     void tex_free(device_memory& mem)
00543     {
00544         if(mem.data_pointer)
00545             mem_free(mem);
00546     }
00547 
00548     size_t global_size_round_up(int group_size, int global_size)
00549     {
00550         int r = global_size % group_size;
00551         return global_size + ((r == 0)? 0: group_size - r);
00552     }
00553 
00554     void path_trace(DeviceTask& task)
00555     {
00556         /* cast arguments to cl types */
00557         cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
00558         cl_mem d_buffer = CL_MEM_PTR(task.buffer);
00559         cl_mem d_rng_state = CL_MEM_PTR(task.rng_state);
00560         cl_int d_x = task.x;
00561         cl_int d_y = task.y;
00562         cl_int d_w = task.w;
00563         cl_int d_h = task.h;
00564         cl_int d_sample = task.sample;
00565         cl_int d_offset = task.offset;
00566         cl_int d_stride = task.stride;
00567 
00568         /* sample arguments */
00569         int narg = 0;
00570         ciErr = 0;
00571 
00572         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
00573         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
00574         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
00575 
00576 #define KERNEL_TEX(type, ttype, name) \
00577     ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
00578 #include "kernel_textures.h"
00579 
00580         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample);
00581         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
00582         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
00583         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w);
00584         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h);
00585         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset);
00586         ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride);
00587 
00588         opencl_assert(ciErr);
00589 
00590         size_t workgroup_size;
00591 
00592         clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice,
00593             CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
00594     
00595         workgroup_size = max(sqrt((double)workgroup_size), 1.0);
00596 
00597         size_t local_size[2] = {workgroup_size, workgroup_size};
00598         size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
00599 
00600         /* run kernel */
00601         ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
00602         opencl_assert(ciErr);
00603         opencl_assert(clFinish(cqCommandQueue));
00604     }
00605 
00606     cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
00607     {
00608         cl_mem ptr;
00609         cl_int err = 0;
00610 
00611         if(mem_map.find(name) != mem_map.end()) {
00612             device_memory *mem = mem_map[name];
00613         
00614             ptr = CL_MEM_PTR(mem->device_pointer);
00615         }
00616         else {
00617             /* work around NULL not working, even though the spec says otherwise */
00618             ptr = CL_MEM_PTR(null_mem);
00619         }
00620         
00621         err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
00622         opencl_assert(err);
00623 
00624         return err;
00625     }
00626 
00627     void tonemap(DeviceTask& task)
00628     {
00629         /* cast arguments to cl types */
00630         cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
00631         cl_mem d_rgba = CL_MEM_PTR(task.rgba);
00632         cl_mem d_buffer = CL_MEM_PTR(task.buffer);
00633         cl_int d_x = task.x;
00634         cl_int d_y = task.y;
00635         cl_int d_w = task.w;
00636         cl_int d_h = task.h;
00637         cl_int d_sample = task.sample;
00638         cl_int d_resolution = task.resolution;
00639         cl_int d_offset = task.offset;
00640         cl_int d_stride = task.stride;
00641 
00642         /* sample arguments */
00643         int narg = 0;
00644         ciErr = 0;
00645 
00646         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
00647         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
00648         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
00649 
00650 #define KERNEL_TEX(type, ttype, name) \
00651     ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
00652 #include "kernel_textures.h"
00653 
00654         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample);
00655         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
00656         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
00657         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
00658         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
00659         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h);
00660         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset);
00661         ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride);
00662 
00663         opencl_assert(ciErr);
00664 
00665         size_t workgroup_size;
00666 
00667         clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice,
00668             CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
00669     
00670         workgroup_size = max(sqrt((double)workgroup_size), 1.0);
00671 
00672         size_t local_size[2] = {workgroup_size, workgroup_size};
00673         size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
00674 
00675         /* run kernel */
00676         ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
00677         opencl_assert(ciErr);
00678         opencl_assert(clFinish(cqCommandQueue));
00679     }
00680 
00681     void task_add(DeviceTask& maintask)
00682     {
00683         list<DeviceTask> tasks;
00684 
00685         /* arbitrary limit to work around apple ATI opencl issue */
00686         if(platform_name == "Apple")
00687             maintask.split_max_size(tasks, 76800);
00688         else
00689             tasks.push_back(maintask);
00690 
00691         DeviceTask task;
00692 
00693         foreach(DeviceTask& task, tasks) {
00694             if(task.type == DeviceTask::TONEMAP)
00695                 tonemap(task);
00696             else if(task.type == DeviceTask::PATH_TRACE)
00697                 path_trace(task);
00698         }
00699     }
00700 
00701     void task_wait()
00702     {
00703     }
00704 
00705     void task_cancel()
00706     {
00707     }
00708 };
00709 
00710 Device *device_opencl_create(DeviceInfo& info, bool background)
00711 {
00712     return new OpenCLDevice(info, background);
00713 }
00714 
00715 void device_opencl_info(vector<DeviceInfo>& devices)
00716 {
00717     vector<cl_device_id> device_ids;
00718     cl_uint num_devices;
00719     cl_platform_id platform_id;
00720     cl_uint num_platforms;
00721 
00722     /* get devices */
00723     if(clGetPlatformIDs(0, NULL, &num_platforms) != CL_SUCCESS || num_platforms == 0)
00724         return;
00725 
00726     if(clGetPlatformIDs(num_platforms, &platform_id, NULL) != CL_SUCCESS)
00727         return;
00728 
00729     if(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &num_devices) != CL_SUCCESS)
00730         return;
00731     
00732     device_ids.resize(num_devices);
00733 
00734     if(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR, num_devices, &device_ids[0], NULL) != CL_SUCCESS)
00735         return;
00736     
00737     /* add devices */
00738     for(int num = 0; num < num_devices; num++) {
00739         cl_device_id device_id = device_ids[num];
00740         char name[1024];
00741 
00742         if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS)
00743             continue;
00744 
00745         DeviceInfo info;
00746 
00747         info.type = DEVICE_OPENCL;
00748         info.description = string(name);
00749         info.id = string_printf("OPENCL_%d", num);
00750         info.num = num;
00751         /* we don't know if it's used for display, but assume it is */
00752         info.display_device = true;
00753 
00754         devices.push_back(info);
00755     }
00756 }
00757 
00758 CCL_NAMESPACE_END
00759 
00760 #endif /* WITH_OPENCL */
00761