From b2c308d403aa067b23ae511262e34bf6a9c1191f Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 24 Jul 2025 22:19:58 +0200
Subject: [PATCH] Add runtime OpenCL version detection and discrete Intel GPU
 tuning

Detect the highest supported OpenCL version at runtime and use the
appropriate -cl-std= flag when compiling kernels. This improves
compatibility with the Intel NEO driver. Note: behavior is untested
on other platforms (NVIDIA, AMD, Apple, etc.). Feedback will be
monitored.

Add tuningdb entries for discrete Intel GPUs. Copy over hash-mode
patterns that benefit from vectorizing on scalar compute units, based
on existing AMD and NVIDIA entries. This change also removes the
artificial thread limit previously enforced for discrete Intel GPUs.

Disable automatic vector width detection from the OpenCL runtime
except on CPU, where it remains in use.
---
 include/types.h                |  3 +-
 src/backend.c                  | 73 ++++++++++++++++++++++------------
 tunings/Alias.hctune           | 20 ++++++++++
 tunings/Modules_default.hctune | 43 ++++++++++++++++++++
 4 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/include/types.h b/include/types.h
index 32a5ae7a4..d4baddbbd 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1518,9 +1518,10 @@ typedef struct hc_device_param
 
   // Some more attributes
 
+  bool    use_opencl11;
   bool    use_opencl12;
   bool    use_opencl20;
-  bool    use_opencl21;
+  bool    use_opencl30;
 
   // AMD
   bool    has_vadd;
diff --git a/src/backend.c b/src/backend.c
index afdf3cc49..594ab4656 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5634,9 +5634,10 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
       device_param->is_metal  = false;
       device_param->is_opencl = false;
 
+      device_param->use_opencl11 = false;
       device_param->use_opencl12 = false;
       device_param->use_opencl20 = false;
-      device_param->use_opencl21 = false;
+      device_param->use_opencl30 = false;
 
       // device_name
 
@@ -6111,9 +6112,10 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
       device_param->is_metal  = false;
       device_param->is_opencl = false;
 
+      device_param->use_opencl11 = false;
       device_param->use_opencl12 = false;
       device_param->use_opencl20 = false;
-      device_param->use_opencl21 = false;
+      device_param->use_opencl30 = false;
 
       // device_name
 
@@ -6621,9 +6623,10 @@ static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UN
       device_param->is_metal  = true;
       device_param->is_opencl = false;
 
+      device_param->use_opencl11 = false;
       device_param->use_opencl12 = false;
       device_param->use_opencl20 = false;
-      device_param->use_opencl21 = false;
+      device_param->use_opencl30 = false;
 
       device_param->is_apple_silicon = is_apple_silicon ();
 
@@ -7029,16 +7032,21 @@ static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, int *vi
 
         // check OpenCL version
 
+        device_param->use_opencl11 = false;
         device_param->use_opencl12 = false;
         device_param->use_opencl20 = false;
-        device_param->use_opencl21 = false;
+        device_param->use_opencl30 = false;
 
         int opencl_version_min = 0;
         int opencl_version_maj = 0;
 
         if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2)
         {
-          if ((opencl_version_min == 1) && (opencl_version_maj == 2))
+          if ((opencl_version_min == 1) && (opencl_version_maj == 1))
+          {
+            device_param->use_opencl11 = true;
+          }
+          else if ((opencl_version_min == 1) && (opencl_version_maj == 2))
           {
             device_param->use_opencl12 = true;
           }
@@ -7046,9 +7054,9 @@ static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, int *vi
           {
             device_param->use_opencl20 = true;
           }
-          else if ((opencl_version_min == 2) && (opencl_version_maj == 1))
+          else if ((opencl_version_min == 3) && (opencl_version_maj == 0))
           {
-            device_param->use_opencl21 = true;
+            device_param->use_opencl30 = true;
           }
         }
 
@@ -10433,10 +10441,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
+            // For CPU we can ask the runtime
+            // For GPUs we want to be more selective and we will use the tuning db
+
+            vector_width = 1;
+
+            if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
             {
-              device_param->skipped = true;
-              continue;
+              if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1)
+              {
+                device_param->skipped = true;
+
+                continue;
+              }
             }
           }
         }
@@ -10467,11 +10484,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
-            {
-              device_param->skipped = true;
+            // For CPU we can ask the runtime
+            // For GPUs we want to be more selective and we will use the tuning db
 
-              continue;
+            vector_width = 1;
+
+            if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+            {
+              if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,  sizeof (vector_width), &vector_width, NULL) == -1)
+              {
+                device_param->skipped = true;
+
+                continue;
+              }
             }
           }
         }
@@ -11124,10 +11149,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     /* currently disabled, hangs NEO drivers since 20.09.
        was required for NEO driver 20.08 to workaround the same issue!
        we go with the latest version
+       v7 re-enabled
+      */
 
     if (device_param->is_opencl == true)
     {
-      if (device_param->use_opencl12 == true)
+      if (device_param->use_opencl11 == true)
+      {
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.1 ");
+      }
+      else if (device_param->use_opencl12 == true)
       {
         build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 ");
       }
@@ -11135,12 +11166,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 ");
       }
-      else if (device_param->use_opencl21 == true)
+      else if (device_param->use_opencl30 == true)
       {
-        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 ");
+        build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL3.0 ");
       }
     }
-    */
 
     // we don't have sm_* on vendors not NV but it doesn't matter
 
@@ -16200,13 +16230,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       threads_per_block = opencl_query_threads_per_block (hashcat_ctx, device_param, kernel);
 
-      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == 0) && (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK))
-      {
-        // Intel is highly inaccurate here: https://github.com/hashcat/hashcat/issues/4356
-
-        threads_per_block = MIN (threads_per_block, device_param->kernel_preferred_wgs_multiple);
-      }
-
       // num_regs check should be included in opencl's CL_KERNEL_WORK_GROUP_SIZE
     }
     else if (device_param->is_metal == true)
diff --git a/tunings/Alias.hctune b/tunings/Alias.hctune
index ce737d5c3..daeb33e25 100644
--- a/tunings/Alias.hctune
+++ b/tunings/Alias.hctune
@@ -321,3 +321,23 @@ AMD_Instinct_MI300X                             ALIAS_AMD
 AMD_Instinct_MI325X                             ALIAS_AMD
 AMD_Instinct_MI350X                             ALIAS_AMD
 AMD_Instinct_MI355X                             ALIAS_AMD
+
+Intel(R)_Arc(TM)_A310_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_A380_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_A580_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_A750_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_A770_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_B570_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_B580_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_B770_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_Cxxx_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_Dxxx_Graphics                  ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_A30M_Graphics              ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_A40_Graphics               ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_A50_Graphics               ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_A60_Graphics               ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_A60M_Graphics              ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_B50_Graphics               ALIAS_INTEL
+Intel(R)_Arc(TM)_Pro_B60_Graphics               ALIAS_INTEL
+
+Intel(R)_Data_Center_GPU_Max_1100               ALIAS_INTEL
diff --git a/tunings/Modules_default.hctune b/tunings/Modules_default.hctune
index 7a0e79fce..f5d404a1c 100644
--- a/tunings/Modules_default.hctune
+++ b/tunings/Modules_default.hctune
@@ -147,6 +147,49 @@ ALIAS_AMD                                       3       23700   2       A
 ALIAS_AMD                                       3       24700   2       A       A
 ALIAS_AMD                                       3       99999   8       A       A
 
+ALIAS_INTEL                                     3       0       4       A       A
+ALIAS_INTEL                                     3       10      4       A       A
+ALIAS_INTEL                                     3       11      4       A       A
+ALIAS_INTEL                                     3       12      4       A       A
+ALIAS_INTEL                                     3       20      4       A       A
+ALIAS_INTEL                                     3       21      4       A       A
+ALIAS_INTEL                                     3       22      4       A       A
+ALIAS_INTEL                                     3       23      4       A       A
+ALIAS_INTEL                                     3       24      4       A       A
+ALIAS_INTEL                                     3       200     4       A       A
+ALIAS_INTEL                                     3       500     2       A       A
+ALIAS_INTEL                                     3       900     4       A       A
+ALIAS_INTEL                                     3       1000    4       A       A
+ALIAS_INTEL                                     3       1100    4       A       A
+ALIAS_INTEL                                     3       1600    2       A       A
+ALIAS_INTEL                                     3       2400    4       A       A
+ALIAS_INTEL                                     3       2410    4       A       A
+ALIAS_INTEL                                     3       2611    2       A       A
+ALIAS_INTEL                                     3       2711    2       A       A
+ALIAS_INTEL                                     3       2811    2       A       A
+ALIAS_INTEL                                     3       3710    4       A       A
+ALIAS_INTEL                                     3       3711    4       A       A
+ALIAS_INTEL                                     3       3800    2       A       A
+ALIAS_INTEL                                     3       4010    4       A       A
+ALIAS_INTEL                                     3       4110    2       A       A
+ALIAS_INTEL                                     3       4300    2       A       A
+ALIAS_INTEL                                     3       4800    4       A       A
+ALIAS_INTEL                                     3       4900    2       A       A
+ALIAS_INTEL                                     3       5300    2       A       A
+ALIAS_INTEL                                     3       5400    2       A       A
+ALIAS_INTEL                                     3       5500    4       A       A
+ALIAS_INTEL                                     3       9900    4       A       A
+ALIAS_INTEL                                     3       11100   2       A       A
+ALIAS_INTEL                                     3       13200   2       A       A
+ALIAS_INTEL                                     3       16400   4       A       A
+ALIAS_INTEL                                     3       18700   4       A       A
+ALIAS_INTEL                                     3       21200   4       A       A
+ALIAS_INTEL                                     3       21700   2       A       A
+ALIAS_INTEL                                     3       23600   2       A       A
+ALIAS_INTEL                                     3       23700   2       A       A
+ALIAS_INTEL                                     3       24700   2       A       A
+ALIAS_INTEL                                     3       99999   4       A       A
+
 ##
 ## CryptoAPI
 ##