1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-24 15:38:47 +00:00

Trying out a tweak to autotune behavior related to -u loop tuning.

Since loop values increase by doubling in autotune, a slow hash-mode
with, for example, 1000 iterations can end up with a suboptimal -u count.
Currently, autotuning starts at 1 and doubles (2, 4, 8, ..., 512, 1024).
If the maximum is 1000, autotune stops at 512, resulting in two kernel
calls: one with 512 iterations and another with 488.

The tweak attempts to find the smallest factor that, when repeatedly
doubled, reaches the target exactly.  For 1000, this would be 125
and for 1024, it would be 1.

However, this logic doesn’t align well with how hashcat handles slow
hash iterations. For instance, PBKDF2-based plugins typically set the
iteration count to N-1, since the first iteration is handled by the
`_init` kernel. So, a plugin might set 1023 instead of 1024, and in such
cases, the logic would incorrectly assume 1023 is the minimum factor
which leads to suboptimal tuning.

To work around this, the factor-finder is executed twice: once with
the original iteration count and once with `iteration count + 1`.
The configuration that results in a lower starting point is used.

Other stuff:

- Fixed a critical bug in the autotuner

This bug was introduced a few days ago. The autotuner has the ability
to overtune the maximum allowed thread count under certain conditions.
For example, in unoptimized -a 0 cracking mode when using rules.
Several parts of the hashcat core require strict adherence to this limit,
especially when shared memory is involved.
To resolve this while retaining overtuning for compatible modes,
a new attribute `device_param->overtune_unfriendly` was introduced.
When set to true, it prevents the autotuner from modifying
`kernel_threads_max` and `kernel_accel_max`.
Four sections in `backend.c` have been updated to set this flag,
though additional areas may also require it.

- Moved the code that aligns `kernel_accel` to a multiple of the compute
  unit count into the overtune section.

- Fixed a bug in the HIP dynloader. It now reports actual error strings,
  provided the API returns them.
This commit is contained in:
Jens Steube 2025-06-27 21:52:57 +02:00
parent bdc47abbe0
commit 974934dcdf
6 changed files with 48 additions and 12 deletions

View File

@ -33,6 +33,7 @@ bool overflow_check_u64_add (const u64 a, const u64 b);
bool overflow_check_u64_mul (const u64 a, const u64 b);
bool is_power_of_2 (const u32 v);
u32 smallest_repeat_double (const u32 v);
u32 get_random_num (const u32 min, const u32 max);

View File

@ -1375,6 +1375,8 @@ typedef struct hc_device_param
u32 kernel_threads_min;
u32 kernel_threads_max;
bool overtune_unfriendly; // whatever sets this decide we operate in a mode that is not allowing to overtune threads_max or accel_max in autotuner
u64 kernel_power;
u64 hardware_power;

View File

@ -98,6 +98,7 @@ static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
{
const hashes_t *hashes = hashcat_ctx->hashes;
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx;
@ -329,7 +330,25 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
// v7 autotuner is a lot more straight forward
for (u32 kernel_loops_test = kernel_loops_min; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
u32 kernel_loops_min_start = kernel_loops_min;
if (hashes && hashes->st_salts_buf)
{
u32 start = kernel_loops_max;
start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
if ((hashes->st_salts_buf->salt_iter % 125) == 0) start = MIN (start, 125);
if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125);
if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
{
kernel_loops_min_start = start;
}
}
for (u32 kernel_loops_test = kernel_loops_min_start; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2);
@ -401,20 +420,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (kernel_accel > kernel_accel_max) kernel_accel = kernel_accel_max;
}
if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
// overtune section. relevant if we have strange numbers from the APIs, namely 96, 384, and such
// this is a dangerous action, and we set conditions somewhere in the code to disable this
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
}
// some final play, if we have strange numbers from the APIs, namely 96, 384, and such
if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max))
if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max) || (device_param->overtune_unfriendly == true))
{
}
else
{
if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
}
u32 fun[2];
if (is_power_of_2 (kernel_threads) == false)

View File

@ -10532,6 +10532,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, 1024); // autotune go over ...
device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, device_param->kernel_loops_max);
device_param->overtune_unfriendly = true;
}
#endif
@ -11499,6 +11501,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
device_param->kernel_threads_min = fixed_local_size;
// device_param->kernel_threads_max = fixed_local_size;
device_param->overtune_unfriendly = true;
}
}
}
@ -16014,6 +16018,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
device_param->overtune_unfriendly = true;
}
}
}
@ -16032,6 +16038,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
device_param->overtune_unfriendly = true;
}
// device_param->kernel_threads = kernel_threads;

View File

@ -133,8 +133,8 @@ int hip_init (void *hashcat_ctx)
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipDrvGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipDrvGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1);

View File

@ -206,6 +206,11 @@ bool is_power_of_2 (const u32 v)
return (v && !(v & (v - 1)));
}
u32 smallest_repeat_double (const u32 v)
{
return (v / (v & -v));
}
u32 mydivc32 (const u32 dividend, const u32 divisor)
{
u32 quotient = dividend / divisor;