Merge pull request #3163 from willcrozi/stdout-perf

stdout mode: batched password index/buffer transfers from device
pull/3214/head
Jens Steube 2 years ago committed by GitHub
commit 43a27117a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -50,6 +50,9 @@ void generate_cached_kernel_amp_filename (const u32 attack_kern, char *cache_
int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw);
int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest);
int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest);
int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos);
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);

@ -860,6 +860,86 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
return 0;
}
int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u64 gidd, const u64 cnt, pw_idx_t *dest)
{
if (device_param->is_cuda == true)
{
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->cuda_stream) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->hip_stream) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_idx, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t))) == -1) return -1;
}
#endif
if (device_param->is_opencl == true)
{
/* blocking */
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), (cnt * sizeof (pw_idx_t)), dest, 0, NULL, NULL) == -1) return -1;
}
return 0;
}
int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 off, u32 cnt, u32 *dest)
{
if (device_param->is_cuda == true)
{
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->cuda_stream) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->hip_stream) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, dest, device_param->metal_d_pws_comp_buf, off * sizeof (u32), cnt * sizeof (u32)) == -1) return -1;
}
#endif
if (device_param->is_opencl == true)
{
/* blocking */
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, off * sizeof (u32), cnt * sizeof (u32), dest, 0, NULL, NULL) == -1) return -1;
}
return 0;
}
int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 highest_pw_len, const u64 pws_pos, const u64 pws_cnt, const u32 fast_iteration, const u32 salt_pos)
{
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;

@ -104,87 +104,51 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
const u32 il_cnt = device_param->kernel_param.il_cnt; // ugly, i know
if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
{
pw_t pw;
int rc = 0;
if (user_options->attack_mode == ATTACK_MODE_BF)
{
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
return -1;
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
const u32 off = device_param->innerloop_pos + il_pos;
u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid;
u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos;
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
for (int i = 0; i < 8; i++)
{
plain_buf[i] = pw.i[i];
}
u32 l_start = device_param->kernel_params_mp_l_buf32[5];
u32 r_start = device_param->kernel_params_mp_r_buf32[5];
plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw.pw_len);
}
else
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw.i[i];
}
u32 l_stop = device_param->kernel_params_mp_l_buf32[4];
u32 r_stop = device_param->kernel_params_mp_r_buf32[4];
plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw.pw_len);
}
sp_exec (l_off, (char *) plain_ptr + l_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, l_start, l_start + l_stop);
sp_exec (r_off, (char *) plain_ptr + r_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, r_start, r_start + r_stop);
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
plain_len = mask_ctx->css_cnt;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_COMBI)
else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
{
pw_t pw;
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
if (rc == -1)
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
if (filename) hc_fclose (&out.fp);
u64 off = device_param->kernel_params_mp_buf64[3] + gidvid;
return -1;
}
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw.i[i];
}
sp_exec (off, (char *) plain_ptr, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
plain_len = pw.pw_len;
plain_len = stop;
char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
u32 comb_len = device_param->combs_buf[il_pos].pw_len;
if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
}
else
{
memmove (plain_ptr + comb_len, plain_ptr, plain_len);
memcpy (plain_ptr, comb_buf, comb_len);
}
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
plain_len += comb_len;
@ -194,104 +158,152 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_BF)
else
{
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
// modes below require transferring pw index/buffer data from device to host
const u64 blk_cnt_max = device_param->size_pws_idx / (sizeof (pw_idx_t));
pw_idx_t *const pws_idx_blk = device_param->pws_idx;
u32 *const pws_comp_blk = device_param->pws_comp;
u64 gidvid_blk = 0; // gidvid of first password in current block
while (gidvid_blk < pws_cnt)
{
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid;
u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos;
// copy the pw indexes from device for this block
u32 l_start = device_param->kernel_params_mp_l_buf32[5];
u32 r_start = device_param->kernel_params_mp_r_buf32[5];
u64 remain = pws_cnt - gidvid_blk;
u64 blk_cnt = MIN (remain, blk_cnt_max);
u32 l_stop = device_param->kernel_params_mp_l_buf32[4];
u32 r_stop = device_param->kernel_params_mp_r_buf32[4];
rc = copy_pws_idx (hashcat_ctx, device_param, gidvid_blk, blk_cnt, pws_idx_blk);
sp_exec (l_off, (char *) plain_ptr + l_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, l_start, l_start + l_stop);
sp_exec (r_off, (char *) plain_ptr + r_start, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, r_start, r_start + r_stop);
if (rc == -1) break;
plain_len = mask_ctx->css_cnt;
const u32 off_blk = (blk_cnt > 0) ? pws_idx_blk[0].off : 0;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
pw_t pw;
const pw_idx_t *pw_idx = device_param->pws_idx;
const pw_idx_t *pw_idx_last = pw_idx + (blk_cnt - 1);
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
// copy the pw buffer data from device for this block
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
u32 copy_cnt = (pw_idx_last->off + pw_idx_last->cnt) - pws_idx_blk->off;
return -1;
}
rc = copy_pws_comp (hashcat_ctx, device_param, off_blk, copy_cnt, pws_comp_blk);
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
if (rc == -1) break;
if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
{
for (int i = 0; i < 64; i++)
while (pw_idx <= pw_idx_last)
{
plain_buf[i] = pw.i[i];
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
pw_idx++;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
const u32 off = device_param->innerloop_pos + il_pos;
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
for (int i = 0; i < 8; i++)
{
plain_buf[i] = pw[i];
}
plain_len = apply_rules_optimized (straight_ctx->kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], pw_len);
}
else
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
plain_len = apply_rules (straight_ctx->kernel_rules_buf[off].cmds, plain_buf, pw_len);
}
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_COMBI)
{
while (pw_idx <= pw_idx_last)
{
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
plain_len = pw.pw_len;
pw_idx++;
u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
plain_len = pw_len;
sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
u32 comb_len = device_param->combs_buf[il_pos].pw_len;
plain_len += start + stop;
if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
}
else
{
memmove (plain_ptr + comb_len, plain_ptr, plain_len);
out_push (&out, plain_ptr, plain_len);
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID2)
{
pw_t pw;
memcpy (plain_ptr, comb_buf, comb_len);
}
for (u64 gidvid = 0; gidvid < pws_cnt; gidvid++)
{
const int rc = gidd_to_pw_t (hashcat_ctx, device_param, gidvid, &pw);
plain_len += comb_len;
if (rc == -1)
{
if (filename) hc_fclose (&out.fp);
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
return -1;
out_push (&out, plain_ptr, plain_len);
}
}
}
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
u64 off = device_param->kernel_params_mp_buf64[3] + gidvid;
while (pw_idx <= pw_idx_last)
{
u32 *pw = pws_comp_blk + (pw_idx->off - off_blk);
u32 pw_len = pw_idx->len;
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
pw_idx++;
sp_exec (off, (char *) plain_ptr, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
for (int i = 0; i < 64; i++)
{
plain_buf[i] = pw[i];
}
plain_len = stop;
plain_len = pw_len;
char *comb_buf = (char *) device_param->combs_buf[il_pos].i;
u32 comb_len = device_param->combs_buf[il_pos].pw_len;
u64 off = device_param->kernel_params_mp_buf64[3] + il_pos;
memcpy (plain_ptr + plain_len, comb_buf, comb_len);
u32 start = 0;
u32 stop = device_param->kernel_params_mp_buf32[4];
plain_len += comb_len;
sp_exec (off, (char *) plain_ptr + plain_len, mask_ctx->root_css_buf, mask_ctx->markov_css_buf, start, start + stop);
if (plain_len > hashconfig->pw_max) plain_len = hashconfig->pw_max;
plain_len += start + stop;
out_push (&out, plain_ptr, plain_len);
out_push (&out, plain_ptr, plain_len);
}
}
}
gidvid_blk += blk_cnt; // prepare for next block
}
}
@ -304,5 +316,5 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
hc_fclose (&out.fp);
}
return 0;
return rc;
}

Loading…
Cancel
Save