Do initial OpenCL device thread management a bit different

2025-07-24 07:28:37 +00:00 · 2017-10-19 14:54:03 +02:00 · 2017-10-19 14:54:03 +02:00 · 635b2bfd50
commit 635b2bfd50
parent f84abdef1c
2 changed files with 51 additions and 27 deletions
--- a/include/types.h
+++ b/include/types.h
@ -149,11 +149,14 @@ typedef enum event_identifier

 typedef enum amplifier_count
 {
-  KERNEL_BFS              = 1024,
-  KERNEL_COMBS            = 1024,
-  KERNEL_RULES            = 256,
-  KERNEL_THREADS_MAX      = 256,
-  KERNEL_THREADS_MAX_CPU  = 1
+  KERNEL_BFS                        = 1024,
+  KERNEL_COMBS                      = 1024,
+  KERNEL_RULES                      = 256,
+  KERNEL_THREADS_MAX_CPU            = 1,
+  KERNEL_THREADS_MAX_GPU            = 8,  // ex: intel integrated
+  KERNEL_THREADS_MAX_GPU_NV         = 32, // optimized NV  size: warps
+  KERNEL_THREADS_MAX_GPU_AMD        = 64, // optimized AMD size: wavefronts
+  KERNEL_THREADS_MAX_OTHER          = 8,  // ex: intel MIC

 } amplifier_count_t;

--- a/src/interface.c
+++ b/src/interface.c
@ -25076,41 +25076,62 @@ u32 hashconfig_forced_kernel_threads (hashcat_ctx_t *hashcat_ctx)

 u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param)
 {
-  // kernel_threads is 256 at this point
+  const user_options_t *user_options = hashcat_ctx->user_options;

-  u32 kernel_threads = KERNEL_THREADS_MAX;
+  // a kernel can force a fixed value

-  // a CPU doesn't have "opencl threads", this sets it to 1
+  const u32 forced_kernel_threads = hashconfig_forced_kernel_threads (hashcat_ctx);
+
+  if (forced_kernel_threads) return forced_kernel_threads;
+
+  // otherwise it depends on (1) the opencl device type
+
+  u32 kernel_threads = 0;

  if (device_param->device_type & CL_DEVICE_TYPE_CPU)
  {
    kernel_threads = KERNEL_THREADS_MAX_CPU;
  }
-
-  // but 64 for AMD GPU, they prefer it
-
-  if (device_param->device_vendor_id == VENDOR_ID_AMD)
+  else if (device_param->device_type & CL_DEVICE_TYPE_GPU)
  {
-    kernel_threads = MIN (kernel_threads, 64);
+    if (device_param->device_vendor_id == VENDOR_ID_NV)
+    {
+      kernel_threads = KERNEL_THREADS_MAX_GPU_NV;
+
+      switch (user_options->workload_profile)
+      {
+        case 1: kernel_threads *= 1; break;
+        case 2: kernel_threads *= 2; break;
+        case 3: kernel_threads *= 4; break;
+        case 4: kernel_threads *= 8; break;
+      }
+    }
+    else if (device_param->device_vendor_id == VENDOR_ID_AMD)
+    {
+      kernel_threads = KERNEL_THREADS_MAX_GPU_AMD;
+
+      switch (user_options->workload_profile)
+      {
+        case 1: kernel_threads *= 1; break;
+        case 2: kernel_threads *= 1; break;
+        case 3: kernel_threads *= 2; break;
+        case 4: kernel_threads *= 4; break;
+      }
+    }
+    else
+    {
+      kernel_threads = KERNEL_THREADS_MAX_GPU;
+    }
+  }
+  else
+  {
+    kernel_threads = KERNEL_THREADS_MAX_OTHER;
  }

-  // internal Intel GPU run at full speed 8
-
-  if ((strstr (device_param->device_vendor, "Intel")) && (device_param->device_type & CL_DEVICE_TYPE_GPU))
-  {
-    kernel_threads = MIN (kernel_threads, 8);
-  }
-
-  // a kernel can force an even lower value
+  // and (2) an opencl device can force an lower value (limited resources on device)

  kernel_threads = MIN (kernel_threads, device_param->device_maxworkgroup_size);

-  // a kernel can also force a fixed value
-
-  const u32 forced_kernel_threads = hashconfig_forced_kernel_threads (hashcat_ctx);
-
-  if (forced_kernel_threads) kernel_threads = forced_kernel_threads;
-
  return kernel_threads;
 }