|
|
|
@ -89,20 +89,27 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
u32 overhead = 0;
|
|
|
|
|
|
|
|
|
|
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
|
|
|
|
if (user_options->kernel_threads_chgd == true)
|
|
|
|
|
{
|
|
|
|
|
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
|
|
|
|
|
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
|
|
|
|
|
// on my development system. no clue where the 4 bytes are spent.
|
|
|
|
|
// I did some research on this and it seems to be related with the datatype.
|
|
|
|
|
// For example, if i used u8 instead, there's only 1 byte wasted.
|
|
|
|
|
|
|
|
|
|
overhead = 4;
|
|
|
|
|
fixed_local_size = user_options->kernel_threads;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
u32 overhead = 0;
|
|
|
|
|
|
|
|
|
|
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
|
|
|
|
if (device_param->device_vendor_id == VENDOR_ID_NV)
|
|
|
|
|
{
|
|
|
|
|
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
|
|
|
|
|
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
|
|
|
|
|
// on my development system. no clue where the 4 bytes are spent.
|
|
|
|
|
// I did some research on this and it seems to be related with the datatype.
|
|
|
|
|
// For example, if i used u8 instead, there's only 1 byte wasted.
|
|
|
|
|
|
|
|
|
|
overhead = 4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
|
|
|
|