diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index a7fbea489..963fe7eb4 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -82,7 +82,7 @@ */ #if defined IS_CPU -#define DECLSPEC static +#define DECLSPEC inline #elif defined IS_GPU #if defined IS_AMD #define DECLSPEC inline diff --git a/src/interface.c b/src/interface.c index 819373bd6..b9f9ed00f 100644 --- a/src/interface.c +++ b/src/interface.c @@ -25993,56 +25993,33 @@ u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_p if (forced_kernel_threads) return forced_kernel_threads; - // it can also depends on the opencl device type + // for CPU we just do 1 + + if (device_param->device_type & CL_DEVICE_TYPE_CPU) return 1; + + // this is an upper limit, a good start, since our strategy is to reduce thread counts only u32 kernel_threads = (u32) device_param->device_maxworkgroup_size; // complicated kernel tend to confuse OpenCL runtime suggestions for maximum thread size - // let's workaround that by sticking to their preferred thread size + // let's workaround that by sticking to their device specific preferred thread size if (hashconfig->opts_type & OPTS_TYPE_PREFERED_THREAD) { if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { - if (device_param->kernel_preferred_wgs_multiple1) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple1); - if (device_param->kernel_preferred_wgs_multiple2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple2); - if (device_param->kernel_preferred_wgs_multiple3) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple3); - if (device_param->kernel_preferred_wgs_multiple4) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple4); - if (device_param->kernel_preferred_wgs_multiple_tm) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_tm); + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + if (device_param->kernel_preferred_wgs_multiple1) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple1); + } + else + { + if (device_param->kernel_preferred_wgs_multiple4) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple4); + } } else { - if (device_param->kernel_preferred_wgs_multiple1) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple1); - if (device_param->kernel_preferred_wgs_multiple2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple2); - if (device_param->kernel_preferred_wgs_multiple3) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple3); - if (device_param->kernel_preferred_wgs_multiple12) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple12); - if (device_param->kernel_preferred_wgs_multiple23) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple23); - if (device_param->kernel_preferred_wgs_multiple_init2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_init2); - if (device_param->kernel_preferred_wgs_multiple_loop2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_loop2); - } - } - - // for CPU we do the same, because some allow up to 8192 thread which seem to be a bit excessive - - if (device_param->device_type & CL_DEVICE_TYPE_CPU) - { - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - if (device_param->kernel_preferred_wgs_multiple1) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple1); - if (device_param->kernel_preferred_wgs_multiple2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple2); - if (device_param->kernel_preferred_wgs_multiple3) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple3); - if (device_param->kernel_preferred_wgs_multiple4) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple4); - if (device_param->kernel_preferred_wgs_multiple_tm) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_tm); - } - else - { - if (device_param->kernel_preferred_wgs_multiple1) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple1); - if (device_param->kernel_preferred_wgs_multiple2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple2); - if (device_param->kernel_preferred_wgs_multiple3) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple3); - if (device_param->kernel_preferred_wgs_multiple12) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple12); - if (device_param->kernel_preferred_wgs_multiple23) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple23); - if (device_param->kernel_preferred_wgs_multiple_init2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_init2); - if (device_param->kernel_preferred_wgs_multiple_loop2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple_loop2); + if (device_param->kernel_preferred_wgs_multiple2) kernel_threads = MIN (kernel_threads, device_param->kernel_preferred_wgs_multiple2); } } else @@ -26064,9 +26041,6 @@ u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_p } } - // we'll return a number power of two, makes future processing much more easy - // kernel_threads = power_of_two_floor_32 (kernel_threads); - return kernel_threads; } diff --git a/src/opencl.c b/src/opencl.c index 52b964842..96d0f9032 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -4546,6 +4546,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) snprintf (build_opts_new, sizeof (build_opts_new) - 1, "%s -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, hashconfig->kern_type); #endif + /* if (device_param->device_type & CL_DEVICE_TYPE_CPU) { if (device_param->platform_vendor_id == VENDOR_ID_INTEL_SDK) @@ -4553,6 +4554,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) strncat (build_opts_new, " -cl-opt-disable", 16); } } + */ strncpy (build_opts, build_opts_new, sizeof (build_opts) - 1);