Workaround missing clEnqueueFillBuffer() support in NVidia's OpenCL runtime

pull/30/head
jsteube 9 years ago
parent 32b8684d75
commit c8f7b7b5d3

@ -2457,16 +2457,37 @@ static void run_kernel_amp (hc_device_param_t *device_param, const uint num)
static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const uint size)
{
// not supported with Nvidia
// hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
if (data.vendor_id == VENDOR_ID_AMD)
{
const cl_uchar zero = 0;
hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL);
}
char *tmp = (char *) mymalloc (size);
if (data.vendor_id == VENDOR_ID_NV)
{
// NOTE: clEnqueueFillBuffer () always fails with -59
// IOW, it's not supported by Nvidia ForceWare <= 352.21,
// How's that possible, OpenCL 1.2 support is advertised??
// We need to workaround...
#define FILLSZ 0x100000
memset (tmp, 0, size);
char *tmp = (char *) mymalloc (FILLSZ);
hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, 0, size, tmp, 0, NULL, NULL);
memset (tmp, 0, FILLSZ);
free (tmp);
for (uint i = 0; i < size; i += FILLSZ)
{
const int left = size - i;
const int fillsz = MIN (FILLSZ, left);
hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, i, fillsz, tmp, 0, NULL, NULL);
}
myfree (tmp);
}
}
static int run_rule_engine (const int rule_len, const char *rule_buf)

Loading…
Cancel
Save