Merge pull request #199 from gm4tr1x/fillBuffer

Handling possible clEnqueueFillBuffer not found in opencl loader
pull/200/head
Jens Steube 8 years ago
commit d552e5d93c

@ -128,7 +128,7 @@ void hc_clReleaseProgram (OCL_PTR *ocl, cl_program program);
void hc_clSetKernelArg (OCL_PTR *ocl, cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
void *hc_clEnqueueMapBuffer (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
void hc_clEnqueueUnmapMemObject (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
void hc_clEnqueueFillBuffer (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
cl_int hc_clEnqueueFillBuffer (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
void hc_clGetKernelWorkGroupInfo (OCL_PTR *ocl, cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
cl_int hc_clGetProgramBuildInfo (OCL_PTR *ocl, cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
void hc_clGetProgramInfo (OCL_PTR *ocl, cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t * param_value_size_ret);

@ -62,13 +62,15 @@
#define HC_LOAD_FUNC(ptr,name,type,libname,noerr) \
ptr->name = (type) hc_dlsym (ptr->lib, #name); \
if (!ptr->name) { \
if (noerr == 1) { \
log_error ("ERROR: %s is missing from %s shared library.", #name, #libname); \
exit (-1); \
} else { \
log_info ("WARNING: %s is missing from %s shared library.", #name, #libname); \
return (-1); \
if (noerr != -1) { \
if (!ptr->name) { \
if (noerr == 1) { \
log_error ("ERROR: %s is missing from %s shared library.", #name, #libname); \
exit (-1); \
} else { \
log_info ("WARNING: %s is missing from %s shared library.", #name, #libname); \
return (-1); \
} \
} \
}

@ -907,6 +907,8 @@ struct __hc_device_param
char *device_version;
char *driver_version;
bool opencl_v12;
cl_uint vendor_id;
cl_kernel kernel1;

@ -90,7 +90,7 @@ int ocl_init (OCL_PTR *ocl)
HC_LOAD_FUNC(ocl, clCreateProgramWithBinary, OCL_CLCREATEPROGRAMWITHBINARY, OpenCL, 1)
HC_LOAD_FUNC(ocl, clCreateProgramWithSource, OCL_CLCREATEPROGRAMWITHSOURCE, OpenCL, 1)
HC_LOAD_FUNC(ocl, clEnqueueCopyBuffer, OCL_CLENQUEUECOPYBUFFER, OpenCL, 1)
HC_LOAD_FUNC(ocl, clEnqueueFillBuffer, OCL_CLENQUEUEFILLBUFFER, OpenCL, 1)
HC_LOAD_FUNC(ocl, clEnqueueFillBuffer, OCL_CLENQUEUEFILLBUFFER, OpenCL, -1)
HC_LOAD_FUNC(ocl, clEnqueueMapBuffer, OCL_CLENQUEUEMAPBUFFER, OpenCL, 1)
HC_LOAD_FUNC(ocl, clEnqueueNDRangeKernel, OCL_CLENQUEUENDRANGEKERNEL, OpenCL, 1)
HC_LOAD_FUNC(ocl, clEnqueueReadBuffer, OCL_CLENQUEUEREADBUFFER, OpenCL, 1)
@ -509,16 +509,19 @@ void hc_clEnqueueUnmapMemObject (OCL_PTR *ocl, cl_command_queue command_queue, c
}
}
void hc_clEnqueueFillBuffer (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
cl_int hc_clEnqueueFillBuffer (OCL_PTR *ocl, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event)
{
cl_int CL_err = ocl->clEnqueueFillBuffer (command_queue, buffer, pattern, pattern_size, offset, size, num_events_in_wait_list, event_wait_list, event);
cl_int CL_err = -1;
if (CL_err != CL_SUCCESS)
if (ocl->clEnqueueFillBuffer)
{
log_error ("ERROR: %s : %d : %s\n", "clEnqueueFillBuffer()", CL_err, val2cstr_cl (CL_err));
CL_err = ocl->clEnqueueFillBuffer (command_queue, buffer, pattern, pattern_size, offset, size, num_events_in_wait_list, event_wait_list, event);
exit (-1);
if (CL_err != CL_SUCCESS && data.quiet == 0)
log_error ("WARNING: %s : %d : %s\n", "clEnqueueFillBuffer()", CL_err, val2cstr_cl (CL_err));
}
return CL_err;
}
void hc_clGetKernelWorkGroupInfo (OCL_PTR *ocl, cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)

@ -2587,15 +2587,18 @@ static void run_kernel_amp (hc_device_param_t *device_param, const uint num)
static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const uint size)
{
if (device_param->vendor_id == VENDOR_ID_AMD)
int rc = -1;
if (device_param->opencl_v12 && device_param->vendor_id == VENDOR_ID_AMD)
{
// So far tested, amd is the only supporting this OpenCL 1.2 function without segfaulting
const cl_uchar zero = 0;
hc_clEnqueueFillBuffer (data.ocl, device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
rc = hc_clEnqueueFillBuffer (data.ocl, device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
}
else
if (rc != 0)
{
// NOTE: clEnqueueFillBuffer () always fails with -59
// IOW, it's not supported by Nvidia ForceWare <= 352.21, also pocl segfaults, also on apple
@ -12659,6 +12662,8 @@ int main (int argc, char **argv)
for (uint platform_devices_id = 0; platform_devices_id < platform_devices_cnt; platform_devices_id++)
{
size_t param_value_size = 0;
const uint device_id = devices_cnt;
hc_device_param_t *device_param = &data.devices_param[device_id];
@ -12689,20 +12694,36 @@ int main (int argc, char **argv)
// device_name
char *device_name = (char *) mymalloc (INFOSZ);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NAME, 0, NULL, &param_value_size);
char *device_name = (char *) mymalloc (param_value_size);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NAME, INFOSZ, device_name, NULL);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_NAME, param_value_size, device_name, NULL);
device_param->device_name = device_name;
// device_version
char *device_version = (char *) mymalloc (INFOSZ);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VERSION, 0, NULL, &param_value_size);
char *device_version = (char *) mymalloc (param_value_size);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VERSION, INFOSZ, device_version, NULL);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_VERSION, param_value_size, device_version, NULL);
device_param->device_version = device_version;
// device_opencl_version
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &param_value_size);
char *device_opencl_version = (char *) mymalloc (param_value_size);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, device_opencl_version, NULL);
device_param->opencl_v12 = device_opencl_version[9] > '1' || device_opencl_version[11] >= '2';
myfree (device_opencl_version);
if (strstr (device_version, "pocl"))
{
// pocl returns the real vendor_id in CL_DEVICE_VENDOR_ID which causes many problems because of hms and missing amd_bfe () etc
@ -12791,10 +12812,11 @@ int main (int argc, char **argv)
device_param->skipped = (skipped1 || skipped2);
// driver_version
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DRIVER_VERSION, 0, NULL, &param_value_size);
char *driver_version = (char *) mymalloc (INFOSZ);
char *driver_version = (char *) mymalloc (param_value_size);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DRIVER_VERSION, INFOSZ, driver_version, NULL);
hc_clGetDeviceInfo (data.ocl, device_param->device, CL_DRIVER_VERSION, param_value_size, driver_version, NULL);
device_param->driver_version = driver_version;

Loading…
Cancel
Save