1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-23 07:08:19 +00:00

OpenCL Runtime: Unlocked maximum thread count for NVIDIA GPU

This commit is contained in:
Jens Steube 2020-01-24 13:24:19 +01:00
parent ccacc508cb
commit 041a777025
2 changed files with 20 additions and 3 deletions

View File

@ -108,7 +108,7 @@
- OpenCL Runtime: Improve ROCm detection and make sure to not confuse with recent AMDGPU drivers
- OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults
- OpenCL Runtime: Reenabled support for Intel GPU OpenCL runtime (Beignet and NEO) because a workaround was found (force -cl-std=CL2.0)
- OpenCL Runtime: Unlocked maximum thread count
- OpenCL Runtime: Unlocked maximum thread count for NVIDIA GPU
- OpenCL Runtime: Update unstable mode warnings for Apple and AMDGPU drivers
- OpenCL Runtime: Workaround JiT compiler error on AMDGPU driver compiling WPA-EAPOL-PBKDF2 OpenCL kernel
- OpenCL Runtime: Workaround JiT compiler error on ROCm 2.3 driver if the 'inline' keyword is used in function declaration

View File

@ -6722,14 +6722,31 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
kernel_threads_max = MIN (kernel_threads_max, device_maxworkgroup_size);
// for CPU we just do 1 ...
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
// for all CPU we just do 1 ...
const u32 cpu_prefered_thread_count = 1;
kernel_threads_max = MIN (kernel_threads_max, cpu_prefered_thread_count);
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
// for GPU we need to distinguish by vendor
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
{
const u32 gpu_prefered_thread_count = 8;
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
}
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{
const u32 gpu_prefered_thread_count = 64;
kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count);
}
}
// this is intenionally! at this point, kernel_threads_min can be higher than kernel_threads_max.
// in this case we actually want kernel_threads_min selected.