From 635b2bfd50ad6e9ecc160eb233d9969a12d430b5 Mon Sep 17 00:00:00 2001 From: jsteube Date: Thu, 19 Oct 2017 14:54:03 +0200 Subject: [PATCH] Do initial OpenCL device thread management a bit different --- include/types.h | 13 ++++++---- src/interface.c | 65 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/include/types.h b/include/types.h index 2f8304aec..de0f4a179 100644 --- a/include/types.h +++ b/include/types.h @@ -149,11 +149,14 @@ typedef enum event_identifier typedef enum amplifier_count { - KERNEL_BFS = 1024, - KERNEL_COMBS = 1024, - KERNEL_RULES = 256, - KERNEL_THREADS_MAX = 256, - KERNEL_THREADS_MAX_CPU = 1 + KERNEL_BFS = 1024, + KERNEL_COMBS = 1024, + KERNEL_RULES = 256, + KERNEL_THREADS_MAX_CPU = 1, + KERNEL_THREADS_MAX_GPU = 8, // ex: intel integrated + KERNEL_THREADS_MAX_GPU_NV = 32, // optimized NV size: warps + KERNEL_THREADS_MAX_GPU_AMD = 64, // optimized AMD size: wavefronts + KERNEL_THREADS_MAX_OTHER = 8, // ex: intel MIC } amplifier_count_t; diff --git a/src/interface.c b/src/interface.c index 2a96a241c..b737ecbfa 100644 --- a/src/interface.c +++ b/src/interface.c @@ -25076,41 +25076,62 @@ u32 hashconfig_forced_kernel_threads (hashcat_ctx_t *hashcat_ctx) u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param) { - // kernel_threads is 256 at this point + const user_options_t *user_options = hashcat_ctx->user_options; - u32 kernel_threads = KERNEL_THREADS_MAX; + // a kernel can force a fixed value - // a CPU doesn't have "opencl threads", this sets it to 1 + const u32 forced_kernel_threads = hashconfig_forced_kernel_threads (hashcat_ctx); + + if (forced_kernel_threads) return forced_kernel_threads; + + // otherwise it depends on (1) the opencl device type + + u32 kernel_threads = 0; if (device_param->device_type & CL_DEVICE_TYPE_CPU) { kernel_threads = KERNEL_THREADS_MAX_CPU; } - - // but 64 for AMD GPU, they prefer it - - if (device_param->device_vendor_id == VENDOR_ID_AMD) + else if (device_param->device_type & CL_DEVICE_TYPE_GPU) { - kernel_threads = MIN (kernel_threads, 64); + if (device_param->device_vendor_id == VENDOR_ID_NV) + { + kernel_threads = KERNEL_THREADS_MAX_GPU_NV; + + switch (user_options->workload_profile) + { + case 1: kernel_threads *= 1; break; + case 2: kernel_threads *= 2; break; + case 3: kernel_threads *= 4; break; + case 4: kernel_threads *= 8; break; + } + } + else if (device_param->device_vendor_id == VENDOR_ID_AMD) + { + kernel_threads = KERNEL_THREADS_MAX_GPU_AMD; + + switch (user_options->workload_profile) + { + case 1: kernel_threads *= 1; break; + case 2: kernel_threads *= 1; break; + case 3: kernel_threads *= 2; break; + case 4: kernel_threads *= 4; break; + } + } + else + { + kernel_threads = KERNEL_THREADS_MAX_GPU; + } + } + else + { + kernel_threads = KERNEL_THREADS_MAX_OTHER; } - // internal Intel GPU run at full speed 8 - - if ((strstr (device_param->device_vendor, "Intel")) && (device_param->device_type & CL_DEVICE_TYPE_GPU)) - { - kernel_threads = MIN (kernel_threads, 8); - } - - // a kernel can force an even lower value + // and (2) an opencl device can force an lower value (limited resources on device) kernel_threads = MIN (kernel_threads, device_param->device_maxworkgroup_size); - // a kernel can also force a fixed value - - const u32 forced_kernel_threads = hashconfig_forced_kernel_threads (hashcat_ctx); - - if (forced_kernel_threads) kernel_threads = forced_kernel_threads; - return kernel_threads; }