diff --git a/docs/changes.txt b/docs/changes.txt index ac05e64cb..cac918b76 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -42,6 +42,7 @@ ## - OpenCL Kernels: Add a decompressing kernel and a compressing host code in order to reduce PCIe transfer time +- OpenCL Kernels: Improve performance preview accuracy in --benchmark, --speed-only and --progress-only mode - OpenCL Kernels: Remove password length restriction to 16 for Cisco-PIX and Cisco-ASA hashes - Terminal: Display Set Cost/Rounds During Benchmarking - Terminal: Show [r]esume in prompt only in pause mode, and show [p]ause in prompt only in resume mode diff --git a/include/common.h b/include/common.h index ac5e17dda..28ce201e5 100644 --- a/include/common.h +++ b/include/common.h @@ -110,7 +110,7 @@ but this is nededed for VS compiler which doesn't have inline keyword but has __ #define PARAMCNT 64 #define DEVICES_MAX 128 #define EXEC_CACHE 128 -#define SPEED_CACHE 128 +#define SPEED_CACHE 4096 #define SPEED_MAXAGE 4096 #define EXPECTED_ITERATIONS 10000 diff --git a/src/hashes.c b/src/hashes.c index b074be811..b50a25e72 100644 --- a/src/hashes.c +++ b/src/hashes.c @@ -330,10 +330,11 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 salt_pos) { - cpt_ctx_t *cpt_ctx = hashcat_ctx->cpt_ctx; - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - hashes_t *hashes = hashcat_ctx->hashes; - status_ctx_t *status_ctx = hashcat_ctx->status_ctx; + cpt_ctx_t *cpt_ctx = hashcat_ctx->cpt_ctx; + hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + hashes_t *hashes = hashcat_ctx->hashes; + status_ctx_t *status_ctx = hashcat_ctx->status_ctx; + user_options_t *user_options = hashcat_ctx->user_options; salt_t *salt_buf = &hashes->salts_buf[salt_pos]; @@ -350,6 +351,14 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, return -1; } + // we want the hc_clEnqueueReadBuffer to run in benchmark mode because it has an influence in performance + // but sometimes, when a benchmark kernel run cracks a kernel, we don't want to see that! + + if (user_options->speed_only == true) + { + return 0; + } + if (num_cracked) { plain_t *cracked = (plain_t *) hccalloc (num_cracked, sizeof (plain_t)); diff --git a/src/opencl.c b/src/opencl.c index d9cf510c8..ca5960e7e 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -1318,7 +1318,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (user_options->speed_only == true) { - if (speed_msec > 4096) return -2; // special RC + if (speed_msec > 4000) return -2; // special RC } } @@ -2460,27 +2460,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (CL_rc == -1) return -1; } - /* - // this writes speed cache, we dont want it - if (user_options->speed_only == true) - { - for (int i = 0; i < 16; i++) - { - const int rc = choose_kernel (hashcat_ctx, device_param, highest_pw_len, pws_cnt, fast_iteration, salt_pos); - - if (rc == -1) return -1; - } - - hc_timer_set (&device_param->timer_speed); - } - */ - const int rc = choose_kernel (hashcat_ctx, device_param, highest_pw_len, pws_cnt, fast_iteration, salt_pos); if (rc == -1) return -1; /** - * benchmark, part1 + * benchmark was aborted because too long kernel runtime (slow hashes only) */ if (user_options->speed_only == true) @@ -2530,7 +2515,43 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co * benchmark, part2 */ - if (user_options->speed_only == true) break; + if (user_options->speed_only == true) + { + double total = device_param->speed_msec[0]; + + for (u32 speed_pos = 1; speed_pos < device_param->speed_pos; speed_pos++) + { + total += device_param->speed_msec[speed_pos]; + } + + // it's unclear if 4s is enough to turn on boost mode for all opencl device + + if ((total > 4000) || (device_param->speed_pos == SPEED_CACHE - 1)) + { + u32 q = device_param->speed_pos / 10; // only use the last 10% of the recording + + if (q == 0) q = 1; + + u64 cnt = 0; + double msec = 0; + + for (u32 speed_pos = device_param->speed_pos - q; speed_pos < device_param->speed_pos; speed_pos++) + { + cnt += device_param->speed_cnt[speed_pos]; + msec += device_param->speed_msec[speed_pos]; + } + + memset (device_param->speed_cnt, 0, SPEED_CACHE * sizeof (u64)); + memset (device_param->speed_msec, 0, SPEED_CACHE * sizeof (double)); + + device_param->speed_cnt[0] = cnt / q; + device_param->speed_msec[0] = msec / q; + + device_param->speed_pos = 1; + + break; + } + } /** * result @@ -6050,6 +6071,12 @@ void opencl_session_reset (hashcat_ctx_t *hashcat_ctx) device_param->words_off = 0; device_param->words_done = 0; + + #if defined (_WIN) + device_param->timer_speed.QuadPart = 0; + #else + device_param->timer_speed.tv_sec = 0; + #endif } opencl_ctx->kernel_power_all = 0;