From 041a77702507adc64e7ab799e24893d56a3e023c Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 24 Jan 2020 13:24:19 +0100 Subject: [PATCH] OpenCL Runtime: Unlocked maximum thread count for NVIDIA GPU --- docs/changes.txt | 2 +- src/backend.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index beb33943f..8df8cadeb 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -108,7 +108,7 @@ - OpenCL Runtime: Improve ROCm detection and make sure to not confuse with recent AMDGPU drivers - OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults - OpenCL Runtime: Reenabled support for Intel GPU OpenCL runtime (Beignet and NEO) because a workaround was found (force -cl-std=CL2.0) -- OpenCL Runtime: Unlocked maximum thread count +- OpenCL Runtime: Unlocked maximum thread count for NVIDIA GPU - OpenCL Runtime: Update unstable mode warnings for Apple and AMDGPU drivers - OpenCL Runtime: Workaround JiT compiler error on AMDGPU driver compiling WPA-EAPOL-PBKDF2 OpenCL kernel - OpenCL Runtime: Workaround JiT compiler error on ROCm 2.3 driver if the 'inline' keyword is used in function declaration diff --git a/src/backend.c b/src/backend.c index 4961a773f..0f43a29d1 100644 --- a/src/backend.c +++ b/src/backend.c @@ -6722,14 +6722,31 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) kernel_threads_max = MIN (kernel_threads_max, device_maxworkgroup_size); - // for CPU we just do 1 ... - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { + // for all CPU we just do 1 ... + const u32 cpu_prefered_thread_count = 1; kernel_threads_max = MIN (kernel_threads_max, cpu_prefered_thread_count); } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + // for GPU we need to distinguish by vendor + + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + const u32 gpu_prefered_thread_count = 8; + + kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count); + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + const u32 gpu_prefered_thread_count = 64; + + kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count); + } + } // this is intenionally! at this point, kernel_threads_min can be higher than kernel_threads_max. // in this case we actually want kernel_threads_min selected.