diff --git a/include/shared.h b/include/shared.h index ae968281c..414f14e61 100644 --- a/include/shared.h +++ b/include/shared.h @@ -33,6 +33,7 @@ bool overflow_check_u64_add (const u64 a, const u64 b); bool overflow_check_u64_mul (const u64 a, const u64 b); bool is_power_of_2 (const u32 v); +u32 smallest_repeat_double (const u32 v); u32 get_random_num (const u32 min, const u32 max); diff --git a/include/types.h b/include/types.h index cd3331234..4badfefa3 100644 --- a/include/types.h +++ b/include/types.h @@ -1375,6 +1375,8 @@ typedef struct hc_device_param u32 kernel_threads_min; u32 kernel_threads_max; + bool overtune_unfriendly; // whatever sets this decide we operate in a mode that is not allowing to overtune threads_max or accel_max in autotuner + u64 kernel_power; u64 hardware_power; diff --git a/src/autotune.c b/src/autotune.c index b33ed36d6..df6d5529f 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -98,6 +98,7 @@ static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) { + const hashes_t *hashes = hashcat_ctx->hashes; const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx; @@ -329,7 +330,25 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param // v7 autotuner is a lot more straight forward - for (u32 kernel_loops_test = kernel_loops_min; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1) + u32 kernel_loops_min_start = kernel_loops_min; + + if (hashes && hashes->st_salts_buf) + { + u32 start = kernel_loops_max; + + start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter)); + start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1)); + + if ((hashes->st_salts_buf->salt_iter % 125) == 0) start = MIN (start, 125); + if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125); + + if ((start >= kernel_loops_min) && (start <= kernel_loops_max)) + { + kernel_loops_min_start = start; + } + } + + for (u32 kernel_loops_test = kernel_loops_min_start; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1) { double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2); @@ -401,20 +420,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (kernel_accel > kernel_accel_max) kernel_accel = kernel_accel_max; } - if (kernel_accel > 64) kernel_accel -= kernel_accel % 32; + // overtune section. relevant if we have strange numbers from the APIs, namely 96, 384, and such + // this is a dangerous action, and we set conditions somewhere in the code to disable this - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) - { - if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors; - } - - // some final play, if we have strange numbers from the APIs, namely 96, 384, and such - - if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max)) + if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max) || (device_param->overtune_unfriendly == true)) { } else { + if (kernel_accel > 64) kernel_accel -= kernel_accel % 32; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors; + } + u32 fun[2]; if (is_power_of_2 (kernel_threads) == false) diff --git a/src/backend.c b/src/backend.c index 6bb511c5b..15e4badbb 100644 --- a/src/backend.c +++ b/src/backend.c @@ -10532,6 +10532,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, 1024); // autotune go over ... device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, device_param->kernel_loops_max); + + device_param->overtune_unfriendly = true; } #endif @@ -11499,6 +11501,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { device_param->kernel_threads_min = fixed_local_size; // device_param->kernel_threads_max = fixed_local_size; + + device_param->overtune_unfriendly = true; } } } @@ -16014,6 +16018,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64); device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64); + + device_param->overtune_unfriendly = true; } } } @@ -16032,6 +16038,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64); device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64); + + device_param->overtune_unfriendly = true; } // device_param->kernel_threads = kernel_threads; diff --git a/src/ext_hip.c b/src/ext_hip.c index c0145504b..decfa987c 100644 --- a/src/ext_hip.c +++ b/src/ext_hip.c @@ -133,8 +133,8 @@ int hip_init (void *hashcat_ctx) HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipDrvGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipDrvGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1); diff --git a/src/shared.c b/src/shared.c index 25bbf5005..a00025352 100644 --- a/src/shared.c +++ b/src/shared.c @@ -206,6 +206,11 @@ bool is_power_of_2 (const u32 v) return (v && !(v & (v - 1))); } +u32 smallest_repeat_double (const u32 v) +{ + return (v / (v & -v)); +} + u32 mydivc32 (const u32 dividend, const u32 divisor) { u32 quotient = dividend / divisor;