From edf7365cdabd0c9b0ee4d9c29cc56c78d11720ab Mon Sep 17 00:00:00 2001 From: Will Crozier Date: Tue, 8 Feb 2022 06:50:51 +0000 Subject: [PATCH] stdout mode: transfer indexes and compressed pw buffer from device in blocks, reducing overhead --- include/backend.h | 3 + src/backend.c | 80 +++++++++++++++ src/stdout.c | 254 ++++++++++++++++++++++++---------------------- 3 files changed, 216 insertions(+), 121 deletions(-) diff --git a/include/backend.h b/include/backend.h index 2f2a58f9c..7bc499e44 100644 --- a/include/backend.h +++ b/include/backend.h @@ -50,6 +50,9 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw); +int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest); +int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest); + int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos); int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); diff --git a/src/backend.c b/src/backend.c index a6dfbcdb8..9da4ede0e 100644 --- a/src/backend.c +++ b/src/backend.c @@ -860,6 +860,86 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c return 0; } +int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest) +{ + if (device_param->is_cuda == true) + { + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1; + + if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->cuda_stream) == -1) return -1; + + if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; + + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1; + } + + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->hip_stream) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_idx, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t))) == -1) return -1; + } + #endif + + if (device_param->is_opencl == true) + { + /* blocking */ + if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t)), dest, 0, NULL, NULL) == -1) return -1; + } + + return 0; +} + +int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest) +{ + if (device_param->is_cuda == true) + { + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1; + + if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->cuda_stream) == -1) return -1; + + if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; + + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1; + } + + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->hip_stream) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + + #if defined (__APPLE__) + if (device_param->is_metal == true) + { + if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_comp_buf, off * sizeof (u32), cnt * sizeof (u32)) == -1) return -1; + } + #endif + + if (device_param->is_opencl == true) + { + /* blocking */ + if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), dest, 0, NULL, NULL) == -1) return -1; + } + + return 0; +} + int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos) { hashconfig_t *hashconfig = hashcat_ctx->hashconfig; diff --git a/src/stdout.c b/src/stdout.c index 343009a3b..b9e11c6a6 100644 --- a/src/stdout.c +++ b/src/stdout.c @@ -104,87 +104,51 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 il_cnt = device_param->kernel_param.il_cnt; // ugly, i know - if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)) - { - pw_t pw; + int rc = 0; + if (user_options->attack_mode == ATTACK_MODE_BF) + { for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++) { - const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw); - - if (rc == -1) - { - if (filename) hc_fclose (&out.fp); - - return -1; - } - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { - const u32 off = device_param->innerloop_pos + il_pos; + u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid; + u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos; - if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) - { - for (int i = 0; i < 8; i++) - { - plain_buf[i] = pw.i[i]; - } + u32 l_start = device_param->kernel_params_mp_l_buf32[5]; + u32 r_start = device_param->kernel_params_mp_r_buf32[5]; - plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw.pw_len); - } - else - { - for (int i = 0; i < 64; i++) - { - plain_buf[i] = pw.i[i]; - } + u32 l_stop = device_param->kernel_params_mp_l_buf32[4]; + u32 r_stop = device_param->kernel_params_mp_r_buf32[4]; - plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw.pw_len); - } + sp_exec (l_off, (char *) plain_ptr + l_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, l_start, l_start + l_stop); + sp_exec (r_off, (char *) plain_ptr + r_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, r_start, r_start + r_stop); - if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max; + plain_len = mask_ctx->css_cnt; out_push (&out, plain_ptr, plain_len); } } } - else if (user_options->attack_mode == ATTACK_MODE_COMBI) + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) { - pw_t pw; - for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++) { - const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw); - - if (rc == -1) + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { - if (filename) hc_fclose (&out.fp); + u64 off = device_param->kernel_params_mp_buf64[3] + gidvid; - return -1; - } + u32 start = 0; + u32 stop = device_param->kernel_params_mp_buf32[4]; - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) - { - for (int i = 0; i < 64; i++) - { - plain_buf[i] = pw.i[i]; - } + sp_exec (off, (char *) plain_ptr, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop); - plain_len = pw.pw_len; + plain_len = stop; char *comb_buf = (char *) device_param->combs_buf[il_pos].i; u32 comb_len = device_param->combs_buf[il_pos].pw_len; - if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - memcpy (plain_ptr + plain_len, comb_buf, comb_len); - } - else - { - memmove (plain_ptr + comb_len, plain_ptr, plain_len); - - memcpy (plain_ptr, comb_buf, comb_len); - } + memcpy (plain_ptr + plain_len, comb_buf, comb_len); plain_len += comb_len; @@ -194,104 +158,152 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, } } } - else if (user_options->attack_mode == ATTACK_MODE_BF) + else { - for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++) + // modes below require transferring pw index/buffer data from device to host + + const u64 blk_cnt_max = device_param->size_pws_idx / (sizeof (pw_idx_t)); + + pw_idx_t *const pws_idx_blk = device_param->pws_idx; + u32 *const pws_comp_blk = device_param->pws_comp; + + u64 gidvid_blk = 0; // gidvid of first password in current block + + while (gidvid_blk < pws_cnt) { - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) - { - u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid; - u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos; + // copy the pw indexes from device for this block - u32 l_start = device_param->kernel_params_mp_l_buf32[5]; - u32 r_start = device_param->kernel_params_mp_r_buf32[5]; + u64 remain = pws_cnt - gidvid_blk; + u64 blk_cnt = MIN (remain, blk_cnt_max); - u32 l_stop = device_param->kernel_params_mp_l_buf32[4]; - u32 r_stop = device_param->kernel_params_mp_r_buf32[4]; + rc = copy_pws_idx (hashcat_ctx, device_param, gidvid_blk, blk_cnt, pws_idx_blk); - sp_exec (l_off, (char *) plain_ptr + l_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, l_start, l_start + l_stop); - sp_exec (r_off, (char *) plain_ptr + r_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, r_start, r_start + r_stop); + if (rc == -1) break; - plain_len = mask_ctx->css_cnt; + const u32 off_blk = (blk_cnt > 0) ? pws_idx_blk[0].off : 0; - out_push (&out, plain_ptr, plain_len); - } - } - } - else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) - { - pw_t pw; + const pw_idx_t *pw_idx = device_param->pws_idx; + const pw_idx_t *pw_idx_last = pw_idx + (blk_cnt - 1); - for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++) - { - const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw); + // copy the pw buffer data from device for this block - if (rc == -1) - { - if (filename) hc_fclose (&out.fp); + u32 copy_cnt = (pw_idx_last->off + pw_idx_last->cnt) - pws_idx_blk->off; - return -1; - } + rc = copy_pws_comp (hashcat_ctx, device_param, off_blk, copy_cnt, pws_comp_blk); - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + if (rc == -1) break; + + if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)) { - for (int i = 0; i < 64; i++) + while (pw_idx <= pw_idx_last) { - plain_buf[i] = pw.i[i]; + u32 *pw = pws_comp_blk + (pw_idx->off - off_blk); + u32 pw_len = pw_idx->len; + + pw_idx++; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 off = device_param->innerloop_pos + il_pos; + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + for (int i = 0; i < 8; i++) + { + plain_buf[i] = pw[i]; + } + + plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw_len); + } + else + { + for (int i = 0; i < 64; i++) + { + plain_buf[i] = pw[i]; + } + + plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw_len); + } + + if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max; + + out_push (&out, plain_ptr, plain_len); + } } + } + else if (user_options->attack_mode == ATTACK_MODE_COMBI) + { + while (pw_idx <= pw_idx_last) + { + u32 *pw = pws_comp_blk + (pw_idx->off - off_blk); + u32 pw_len = pw_idx->len; - plain_len = pw.pw_len; + pw_idx++; - u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + for (int i = 0; i < 64; i++) + { + plain_buf[i] = pw[i]; + } - u32 start = 0; - u32 stop = device_param->kernel_params_mp_buf32[4]; + plain_len = pw_len; - sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop); + char *comb_buf = (char *) device_param->combs_buf[il_pos].i; + u32 comb_len = device_param->combs_buf[il_pos].pw_len; - plain_len += start + stop; + if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + memcpy (plain_ptr + plain_len, comb_buf, comb_len); + } + else + { + memmove (plain_ptr + comb_len, plain_ptr, plain_len); - out_push (&out, plain_ptr, plain_len); - } - } - } - else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) - { - pw_t pw; + memcpy (plain_ptr, comb_buf, comb_len); + } - for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++) - { - const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw); + plain_len += comb_len; - if (rc == -1) - { - if (filename) hc_fclose (&out.fp); + if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max; - return -1; + out_push (&out, plain_ptr, plain_len); + } + } } - - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) { - u64 off = device_param->kernel_params_mp_buf64[3] + gidvid; + while (pw_idx <= pw_idx_last) + { + u32 *pw = pws_comp_blk + (pw_idx->off - off_blk); + u32 pw_len = pw_idx->len; - u32 start = 0; - u32 stop = device_param->kernel_params_mp_buf32[4]; + pw_idx++; - sp_exec (off, (char *) plain_ptr, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop); + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + for (int i = 0; i < 64; i++) + { + plain_buf[i] = pw[i]; + } - plain_len = stop; + plain_len = pw_len; - char *comb_buf = (char *) device_param->combs_buf[il_pos].i; - u32 comb_len = device_param->combs_buf[il_pos].pw_len; + u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; - memcpy (plain_ptr + plain_len, comb_buf, comb_len); + u32 start = 0; + u32 stop = device_param->kernel_params_mp_buf32[4]; - plain_len += comb_len; + sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop); - if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max; + plain_len += start + stop; - out_push (&out, plain_ptr, plain_len); + out_push (&out, plain_ptr, plain_len); + } + } } + + gidvid_blk += blk_cnt; // prepare for next block } } @@ -304,5 +316,5 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hc_fclose (&out.fp); } - return 0; + return rc; }