diff --git a/src/autotune.c b/src/autotune.c index ca62bc957..cfde1abed 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -95,7 +95,11 @@ static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi return exec_msec_best; } -/* +static bool is_power_of_2 (const u32 n) +{ + return n != 0 && (n & (n - 1)) == 0; +} + static u32 previous_power_of_two (const u32 x) { // https://stackoverflow.com/questions/2679815/previous-power-of-2 @@ -113,7 +117,23 @@ static u32 previous_power_of_two (const u32 x) return r - (r >> 1); } -*/ + +static u32 next_power_of_two (const u32 x) +{ + if (x == 0) return 1; + + u32 r = x - 1; + + r |= (r >> 1); + r |= (r >> 2); + r |= (r >> 4); + r |= (r >> 8); + r |= (r >> 16); + + r++; + + return r; +} static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) { @@ -416,6 +436,89 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (kernel_accel > 64) kernel_accel -= kernel_accel % 32; } + // some final play, if we have strange numbers from the APIs, namely 96, 384, and such + + if (is_power_of_2 (kernel_threads) == false) + { + u32 fun[2]; + + fun[0] = previous_power_of_two (kernel_threads); + fun[1] = next_power_of_two (kernel_threads); + + float fact[2]; + + fact[0] = (float) kernel_threads / fun[0]; + fact[1] = (float) kernel_threads / fun[1]; + + float ms_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2); + + float res[2]; + + for (int i = 0; i < 2; i++) + { + const float ms = try_run_times (hashcat_ctx, device_param, kernel_accel * fact[i], kernel_loops, fun[i], 2); + + res[i] = ms_prev / ms; + } + + const int sel = (res[0] > res[1]) ? 0 : 1; + + if (res[sel] > 1.01) + { + const u32 kernel_accel_new = kernel_accel * fact[sel]; + const u32 kernel_threads_new = fun[sel]; + + if ((kernel_accel_new >= kernel_accel_min) && (kernel_accel_new <= kernel_accel_max)) + { + // we can't check kernel_threads because that is for sure outside the range + + kernel_accel = kernel_accel_new; + kernel_threads = kernel_threads_new; + } + } + } + else + { + // that's also nice + + u32 fun[2]; + + fun[0] = kernel_threads >> 1; + fun[1] = kernel_threads << 1; + + float fact[2]; + + fact[0] = (float) kernel_threads / fun[0]; + fact[1] = (float) kernel_threads / fun[1]; + + float ms_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2); + + float res[2]; + + for (int i = 0; i < 2; i++) + { + const float ms = try_run_times (hashcat_ctx, device_param, kernel_accel * fact[i], kernel_loops, fun[i], 2); + + res[i] = ms_prev / ms; + } + + const int sel = (res[0] > res[1]) ? 0 : 1; + + if (res[sel] > 1.01) + { + const u32 kernel_accel_new = kernel_accel * fact[sel]; + const u32 kernel_threads_new = fun[sel]; + + if ((kernel_accel_new >= kernel_accel_min) && (kernel_accel_new <= kernel_accel_max)) + { + // we can't check kernel_threads because that is for sure outside the range + + kernel_accel = kernel_accel_new; + kernel_threads = kernel_threads_new; + } + } + } + // reset them fake words // reset other buffers in case autotune cracked something diff --git a/src/shared.c b/src/shared.c index f790d09fa..a7d8dca4e 100644 --- a/src/shared.c +++ b/src/shared.c @@ -21,6 +21,7 @@ #endif #if defined (_WIN) +#include #include #endif