diff --git a/src/autotune.c b/src/autotune.c index 4a4dc0d85..7938fb259 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -332,6 +332,22 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } // v7 autotuner is a lot more straight forward + // we start with some purely theoretical values as a base, then move on to some meassured tests + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (kernel_accel_min < kernel_accel_max) + { + // let's also do some minimal accel, this is only to improve early meassurements taken with try_run() + + const u32 kernel_accel_start = previous_power_of_two (kernel_accel_max / 8); + + if ((kernel_accel_start >= kernel_accel_min) && (kernel_accel_start <= kernel_accel_max)) + { + kernel_accel = kernel_accel_start; + } + } + } if (kernel_threads_min < kernel_threads_max) { @@ -348,24 +364,42 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } } - if (hashes && hashes->st_salts_buf) + if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL) { - u32 start = kernel_loops_max; - - const u32 salt_iter = hashes->st_salts_buf->salt_iter; - - if (salt_iter) + if (hashes && hashes->salts_buf) { - start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter)); - start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1)); + u32 start = kernel_loops_max; - if (((hashes->st_salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125); - if (((hashes->st_salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125); + const u32 salt_iter = hashes->salts_buf->salt_iter; // we use the first salt as reference - if ((start >= kernel_loops_min) && (start <= kernel_loops_max)) + if (salt_iter) { - kernel_loops = start; + start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter)); + start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter + 1)); + + if (((hashes->salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125); + if (((hashes->salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125); + + if ((start >= kernel_loops_min) && (start <= kernel_loops_max)) + { + kernel_loops = start; + } } + else + { + // how can there be a slow hash with no iterations? + } + } + } + else + { + // let's also do some minimal loops, this is only to improve early meassurements taken with try_run() + + const u32 kernel_loops_start = previous_power_of_two (kernel_loops_max / 4); + + if ((kernel_loops_start >= kernel_loops_min) && (kernel_loops_start <= kernel_loops_max)) + { + kernel_loops = kernel_loops_start; } } @@ -396,30 +430,45 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param double exec_msec_init = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2); float threads_eff_best = exec_msec_init / kernel_threads; + u32 threads_cnt_best = kernel_threads; + + float threads_eff_prev = 0; + u32 threads_cnt_prev = 0; for (u32 kernel_threads_test = kernel_threads; kernel_threads_test <= kernel_threads_max; kernel_threads_test = (kernel_threads_test < device_param->kernel_preferred_wgs_multiple) ? kernel_threads_test << 1 : kernel_threads_test + device_param->kernel_preferred_wgs_multiple) { double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads_test, 2); + //printf ("thread %f %u %u %u\n", exec_msec, kernel_accel, kernel_loops, kernel_threads_test); if (exec_msec > target_msec) break; if (kernel_threads >= 32) { // we want a little room for accel to play with so not full target_msec - if (exec_msec > target_msec / 8) break; + if (exec_msec > target_msec / 4) break; } - float threads_eff_cur = exec_msec / kernel_threads_test; + kernel_threads = kernel_threads_test; - if ((threads_eff_cur * 1.05) < threads_eff_best) + threads_eff_prev = exec_msec / kernel_threads_test; + threads_cnt_prev = kernel_threads_test; + + //printf ("%f\n", threads_eff_prev); + + if (threads_eff_prev < threads_eff_best) { - threads_eff_best = threads_eff_cur; - - kernel_threads = kernel_threads_test; + threads_eff_best = threads_eff_prev; + threads_cnt_best = threads_cnt_prev; } } + // now we decide to choose either maximum or in some extreme cases prefer more efficient ones + if ((threads_eff_best * 1.06) < threads_eff_prev) + { + kernel_threads = threads_cnt_best; + } + #define STEPS_CNT 12 // now we tune for kernel-accel but with the new kernel-loops from previous loop set diff --git a/src/backend.c b/src/backend.c index 789dcd7df..6e8fe4aad 100644 --- a/src/backend.c +++ b/src/backend.c @@ -10217,10 +10217,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { const u64 device_available_mem_sav = device_param->device_available_mem; - const u64 device_available_mem_new = device_available_mem_sav - (device_available_mem_sav * 0.2); + const u64 device_available_mem_new = device_available_mem_sav - (device_available_mem_sav * 0.34); event_log_warning (hashcat_ctx, "* Device #%u: This system does not offer any reliable method to query actual free memory. Estimated base: %" PRIu64, device_id + 1, device_available_mem_sav); - event_log_warning (hashcat_ctx, " Assuming normal desktop activity, reducing estimate by 20%%: %" PRIu64, device_available_mem_new); + event_log_warning (hashcat_ctx, " Assuming normal desktop activity, reducing estimate by 34%%: %" PRIu64, device_available_mem_new); event_log_warning (hashcat_ctx, " This can hurt performance drastically, especially on memory-heavy algorithms."); event_log_warning (hashcat_ctx, " You can adjust this percentage using --backend-devices-keepfree"); event_log_warning (hashcat_ctx, NULL); @@ -16275,12 +16275,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { const u64 GiB4 = 4ULL * 1024 * 1024 * 1024; - event_log_warning (hashcat_ctx, "Couldn't query the OS for free memory, assuming 4GiB"); + event_log_warning (hashcat_ctx, "Couldn't query the OS for free memory, assuming 4GiB is available per compute device"); accel_limit_host = GiB4; } else { + if (user_options->backend_devices_keepfree) + { + accel_limit_host = ((u64) accel_limit_host * (100 - user_options->backend_devices_keepfree)) / 100; + } + else + { + accel_limit_host = accel_limit_host - (accel_limit_host * 0.34); + } + + accel_limit_host /= backend_ctx->backend_devices_active; + // even tho let's not be greedy const u64 GiB8 = 8ULL * 1024 * 1024 * 1024; diff --git a/src/main.c b/src/main.c index 994b0bc9f..69d89e81b 100644 --- a/src/main.c +++ b/src/main.c @@ -643,7 +643,17 @@ static void main_backend_session_hostmem (MAYBE_UNUSED hashcat_ctx_t *hashcat_ct const u64 *hostmem = (const u64 *) buf; - event_log_info (hashcat_ctx, "Host memory required for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024)); + u64 free_memory = 0; + + if (get_free_memory (&free_memory) == false) + { + event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024)); + } + else + { + event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB (%" PRIu64 " MB free)", *hostmem / (1024 * 1024), free_memory / (1024 * 1024)); + } + event_log_info (hashcat_ctx, NULL); }