From b2c308d403aa067b23ae511262e34bf6a9c1191f Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 24 Jul 2025 22:19:58 +0200 Subject: [PATCH] Add runtime OpenCL version detection and discrete Intel GPU tuning Detect the highest supported OpenCL version at runtime and use the appropriate -cl-std= flag when compiling kernels. This improves compatibility with the Intel NEO driver. Note: behavior is untested on other platforms (NVIDIA, AMD, Apple, etc.). Feedback will be monitored. Add tuningdb entries for discrete Intel GPUs. Copy over hash-mode patterns that benefit from vectorizing on scalar compute units, based on existing AMD and NVIDIA entries. This change also removes the artificial thread limit previously enforced for discrete Intel GPUs. Disable automatic vector width detection from the OpenCL runtime except on CPU, where it remains in use. --- include/types.h | 3 +- src/backend.c | 73 ++++++++++++++++++++++------------ tunings/Alias.hctune | 20 ++++++++++ tunings/Modules_default.hctune | 43 ++++++++++++++++++++ 4 files changed, 113 insertions(+), 26 deletions(-) diff --git a/include/types.h b/include/types.h index 32a5ae7a4..d4baddbbd 100644 --- a/include/types.h +++ b/include/types.h @@ -1518,9 +1518,10 @@ typedef struct hc_device_param // Some more attributes + bool use_opencl11; bool use_opencl12; bool use_opencl20; - bool use_opencl21; + bool use_opencl30; // AMD bool has_vadd; diff --git a/src/backend.c b/src/backend.c index afdf3cc49..594ab4656 100644 --- a/src/backend.c +++ b/src/backend.c @@ -5634,9 +5634,10 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt device_param->is_metal = false; device_param->is_opencl = false; + device_param->use_opencl11 = false; device_param->use_opencl12 = false; device_param->use_opencl20 = false; - device_param->use_opencl21 = false; + device_param->use_opencl30 = false; // device_name @@ -6111,9 +6112,10 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth device_param->is_metal = false; device_param->is_opencl = false; + device_param->use_opencl11 = false; device_param->use_opencl12 = false; device_param->use_opencl20 = false; - device_param->use_opencl21 = false; + device_param->use_opencl30 = false; // device_name @@ -6621,9 +6623,10 @@ static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UN device_param->is_metal = true; device_param->is_opencl = false; + device_param->use_opencl11 = false; device_param->use_opencl12 = false; device_param->use_opencl20 = false; - device_param->use_opencl21 = false; + device_param->use_opencl30 = false; device_param->is_apple_silicon = is_apple_silicon (); @@ -7029,16 +7032,21 @@ static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, int *vi // check OpenCL version + device_param->use_opencl11 = false; device_param->use_opencl12 = false; device_param->use_opencl20 = false; - device_param->use_opencl21 = false; + device_param->use_opencl30 = false; int opencl_version_min = 0; int opencl_version_maj = 0; if (sscanf (opencl_platform_version, "OpenCL %d.%d", &opencl_version_min, &opencl_version_maj) == 2) { - if ((opencl_version_min == 1) && (opencl_version_maj == 2)) + if ((opencl_version_min == 1) && (opencl_version_maj == 1)) + { + device_param->use_opencl11 = true; + } + else if ((opencl_version_min == 1) && (opencl_version_maj == 2)) { device_param->use_opencl12 = true; } @@ -7046,9 +7054,9 @@ static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, int *vi { device_param->use_opencl20 = true; } - else if ((opencl_version_min == 2) && (opencl_version_maj == 1)) + else if ((opencl_version_min == 3) && (opencl_version_maj == 0)) { - device_param->use_opencl21 = true; + device_param->use_opencl30 = true; } } @@ -10433,10 +10441,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (device_param->is_opencl == true) { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) + // For CPU we can ask the runtime + // For GPUs we want to be more selective and we will use the tuning db + + vector_width = 1; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - device_param->skipped = true; - continue; + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) + { + device_param->skipped = true; + + continue; + } } } } @@ -10467,11 +10484,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (device_param->is_opencl == true) { - if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) - { - device_param->skipped = true; + // For CPU we can ask the runtime + // For GPUs we want to be more selective and we will use the tuning db - continue; + vector_width = 1; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) + { + device_param->skipped = true; + + continue; + } } } } @@ -11124,10 +11149,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) /* currently disabled, hangs NEO drivers since 20.09. was required for NEO driver 20.08 to workaround the same issue! we go with the latest version + v7 re-enabled + */ if (device_param->is_opencl == true) { - if (device_param->use_opencl12 == true) + if (device_param->use_opencl11 == true) + { + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.1 "); + } + else if (device_param->use_opencl12 == true) { build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL1.2 "); } @@ -11135,12 +11166,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.0 "); } - else if (device_param->use_opencl21 == true) + else if (device_param->use_opencl30 == true) { - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL2.1 "); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-cl-std=CL3.0 "); } } - */ // we don't have sm_* on vendors not NV but it doesn't matter @@ -16200,13 +16230,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) threads_per_block = opencl_query_threads_per_block (hashcat_ctx, device_param, kernel); - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == 0) && (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)) - { - // Intel is highly inaccurate here: https://github.com/hashcat/hashcat/issues/4356 - - threads_per_block = MIN (threads_per_block, device_param->kernel_preferred_wgs_multiple); - } - // num_regs check should be included in opencl's CL_KERNEL_WORK_GROUP_SIZE } else if (device_param->is_metal == true) diff --git a/tunings/Alias.hctune b/tunings/Alias.hctune index ce737d5c3..daeb33e25 100644 --- a/tunings/Alias.hctune +++ b/tunings/Alias.hctune @@ -321,3 +321,23 @@ AMD_Instinct_MI300X ALIAS_AMD AMD_Instinct_MI325X ALIAS_AMD AMD_Instinct_MI350X ALIAS_AMD AMD_Instinct_MI355X ALIAS_AMD + +Intel(R)_Arc(TM)_A310_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_A380_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_A580_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_A750_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_A770_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_B570_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_B580_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_B770_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Cxxx_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Dxxx_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_A30M_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_A40_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_A50_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_A60_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_A60M_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_B50_Graphics ALIAS_INTEL +Intel(R)_Arc(TM)_Pro_B60_Graphics ALIAS_INTEL + +Intel(R)_Data_Center_GPU_Max_1100 ALIAS_INTEL diff --git a/tunings/Modules_default.hctune b/tunings/Modules_default.hctune index 7a0e79fce..f5d404a1c 100644 --- a/tunings/Modules_default.hctune +++ b/tunings/Modules_default.hctune @@ -147,6 +147,49 @@ ALIAS_AMD 3 23700 2 A ALIAS_AMD 3 24700 2 A A ALIAS_AMD 3 99999 8 A A +ALIAS_INTEL 3 0 4 A A +ALIAS_INTEL 3 10 4 A A +ALIAS_INTEL 3 11 4 A A +ALIAS_INTEL 3 12 4 A A +ALIAS_INTEL 3 20 4 A A +ALIAS_INTEL 3 21 4 A A +ALIAS_INTEL 3 22 4 A A +ALIAS_INTEL 3 23 4 A A +ALIAS_INTEL 3 24 4 A A +ALIAS_INTEL 3 200 4 A A +ALIAS_INTEL 3 500 2 A A +ALIAS_INTEL 3 900 4 A A +ALIAS_INTEL 3 1000 4 A A +ALIAS_INTEL 3 1100 4 A A +ALIAS_INTEL 3 1600 2 A A +ALIAS_INTEL 3 2400 4 A A +ALIAS_INTEL 3 2410 4 A A +ALIAS_INTEL 3 2611 2 A A +ALIAS_INTEL 3 2711 2 A A +ALIAS_INTEL 3 2811 2 A A +ALIAS_INTEL 3 3710 4 A A +ALIAS_INTEL 3 3711 4 A A +ALIAS_INTEL 3 3800 2 A A +ALIAS_INTEL 3 4010 4 A A +ALIAS_INTEL 3 4110 2 A A +ALIAS_INTEL 3 4300 2 A A +ALIAS_INTEL 3 4800 4 A A +ALIAS_INTEL 3 4900 2 A A +ALIAS_INTEL 3 5300 2 A A +ALIAS_INTEL 3 5400 2 A A +ALIAS_INTEL 3 5500 4 A A +ALIAS_INTEL 3 9900 4 A A +ALIAS_INTEL 3 11100 2 A A +ALIAS_INTEL 3 13200 2 A A +ALIAS_INTEL 3 16400 4 A A +ALIAS_INTEL 3 18700 4 A A +ALIAS_INTEL 3 21200 4 A A +ALIAS_INTEL 3 21700 2 A A +ALIAS_INTEL 3 23600 2 A A +ALIAS_INTEL 3 23700 2 A A +ALIAS_INTEL 3 24700 2 A A +ALIAS_INTEL 3 99999 4 A A + ## ## CryptoAPI ##