From 3d2c0018fc6a35a42aa4fcfa295a0085d57288a6 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 3 Feb 2018 12:28:00 +0100 Subject: [PATCH] Limit the maximum OpenCL thread count to 256 If an OpenCL device allows a very high thread count (for example 1024 on nvidia), the host memory required is 32 times as high with 32 (It jumps from 128MB to 4GB device memory requirement). since there's no device with that much device memory (because of 1/4 memory rule) it has to limit the kernel_accel_max to be a very low number because the pws buffer will be so large otherwise. therefore autotune will be unable to calculate a good kernel_accel multiplier. currently there's no OpenCL device known that needs result in a better performance with 1024 threads compared to 256. as a result, we limit the number of threads to 256, which turns out to be a general good value. --- src/interface.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/interface.c b/src/interface.c index c73e8628a..e80f24caf 100644 --- a/src/interface.c +++ b/src/interface.c @@ -26017,6 +26017,16 @@ u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_p kernel_threads = MIN (kernel_threads, device_param->device_maxworkgroup_size); + // and (3) if an OpenCL device allows a very high thread count (for example 1024 on nvidia), + // the host memory required is 32 times as high with 32 (It jumps from 128MB to 4GB device memory requirement). + // since there's no device with that much device memory (because of 1/4 memory rule) it has to limit the + // kernel_accel_max to be a very low number because the pws buffer will be so large otherwise. + // therefore autotune will be unable to calculate a good kernel_accel multiplier. + // currently there's no OpenCL device known that needs result in a better performance with 1024 threads compared to 256. + // as a result, we limit the number of threads to 256, which turns out to be a general good value. + + kernel_threads = MIN (kernel_threads, 256); + return kernel_threads; }