1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-22 14:48:12 +00:00

stdout mode: transfer indexes and compressed pw buffer from device in blocks, reducing overhead

This commit is contained in:
Will Crozier 2022-02-08 06:50:51 +00:00
parent 4ed01c2299
commit edf7365cda
3 changed files with 234 additions and 139 deletions

View File

@ -50,6 +50,9 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_
int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw);
int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest);
int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest);
int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos);
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);

View File

@ -860,6 +860,86 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
return 0;
}
int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest)
{
if (device_param->is_cuda == true)
{
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->cuda_stream) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->hip_stream) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_idx, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t))) == -1) return -1;
}
#endif
if (device_param->is_opencl == true)
{
/* blocking */
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t)), dest, 0, NULL, NULL) == -1) return -1;
}
return 0;
}
int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest)
{
if (device_param->is_cuda == true)
{
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->cuda_stream) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->hip_stream) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_comp_buf, off * sizeof (u32), cnt * sizeof (u32)) == -1) return -1;
}
#endif
if (device_param->is_opencl == true)
{
/* blocking */
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), dest, 0, NULL, NULL) == -1) return -1;
}
return 0;
}
int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos)
{
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;

View File

@ -104,97 +104,9 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
const u32 il_cnt = device_param->kernel_param.il_cnt; // ugly, i know
if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
{
pw_t pw;
int rc = 0;
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
return -1;
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
const u32 off = device_param->innerloop_pos + il_pos;
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
for (int i = 0; i < 8; i++)
{
plain_buf[i] = pw.i[i];
}
plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw.pw_len);
}
else
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw.i[i];
}
plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw.pw_len);
}
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_COMBI)
{
pw_t pw;
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
return -1;
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw.i[i];
}
plain_len = pw.pw_len;
char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
u32 comb_len = device_param->combs_buf[il_pos].pw_len;
if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
}
else
{
memmove (plain_ptr + comb_len, plain_ptr, plain_len);
memcpy (plain_ptr, comb_buf, comb_len);
}
plain_len += comb_len;
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_BF)
if (user_options->attack_mode == ATTACK_MODE_BF)
{
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
@ -218,58 +130,10 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
pw_t pw;
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
return -1;
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw.i[i];
}
plain_len = pw.pw_len;
u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
plain_len += start + stop;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
{
pw_t pw;
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
return -1;
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
u64 off = device_param->kernel_params_mp_buf64[3] + gidvid;
@ -294,6 +158,154 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
}
}
else
{
// modes below require transferring pw index/buffer data from device to host
const u64 blk_cnt_max = device_param->size_pws_idx / (sizeof (pw_idx_t));
pw_idx_t *const pws_idx_blk = device_param->pws_idx;
u32 *const pws_comp_blk = device_param->pws_comp;
u64 gidvid_blk = 0; // gidvid of first password in current block
while (gidvid_blk < pws_cnt)
{
// copy the pw indexes from device for this block
u64 remain = pws_cnt - gidvid_blk;
u64 blk_cnt = MIN (remain, blk_cnt_max);
rc = copy_pws_idx (hashcat_ctx, device_param, gidvid_blk, blk_cnt, pws_idx_blk);
if (rc == -1) break;
const u32 off_blk = (blk_cnt > 0) ? pws_idx_blk[0].off : 0;
const pw_idx_t *pw_idx = device_param->pws_idx;
const pw_idx_t *pw_idx_last = pw_idx + (blk_cnt - 1);
// copy the pw buffer data from device for this block
u32 copy_cnt = (pw_idx_last->off + pw_idx_last->cnt) - pws_idx_blk->off;
rc = copy_pws_comp (hashcat_ctx, device_param, off_blk, copy_cnt, pws_comp_blk);
if (rc == -1) break;
if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
{
while (pw_idx <= pw_idx_last)
{
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
pw_idx++;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
const u32 off = device_param->innerloop_pos + il_pos;
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
for (int i = 0; i < 8; i++)
{
plain_buf[i] = pw[i];
}
plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw_len);
}
else
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw_len);
}
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_COMBI)
{
while (pw_idx <= pw_idx_last)
{
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
pw_idx++;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
plain_len = pw_len;
char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
u32 comb_len = device_param->combs_buf[il_pos].pw_len;
if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
}
else
{
memmove (plain_ptr + comb_len, plain_ptr, plain_len);
memcpy (plain_ptr, comb_buf, comb_len);
}
plain_len += comb_len;
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
while (pw_idx <= pw_idx_last)
{
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
pw_idx++;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
plain_len = pw_len;
u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
plain_len += start + stop;
out_push (&out, plain_ptr, plain_len);
}
}
}
gidvid_blk += blk_cnt; // prepare for next block
}
}
out_flush (&out);
@ -304,5 +316,5 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
hc_fclose (&out.fp);
}
return 0;
return rc;
}