1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-22 16:18:09 +00:00

OpenCL Kernels: Add a decompressing kernel and a compressing host code in order to reduce PCIe transfer time

For details see https://hashcat.net/forum/thread-7267.html
This commit is contained in:
jsteube 2018-02-05 17:18:58 +01:00
parent 2f30e5e929
commit a3a16f676f
12 changed files with 354 additions and 97 deletions

View File

@ -17,9 +17,9 @@ __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __constant const
if (rules_buf[0].cmds[0] == RULE_OP_MANGLE_NOOP && rules_buf[0].cmds[1] == 0) return;
pw_t pw = pws[gid];
pw_t pw = pws_amp[gid];
pw.pw_len = apply_rules (rules_buf[0].cmds, pw.i, pw.pw_len);
pws_amp[gid] = pw;
pws[gid] = pw;
}

View File

@ -13,7 +13,7 @@ __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const ke
if (gid >= gid_max) return;
pw_t pw = pws[gid];
pw_t pw = pws_amp[gid];
pw_t comb = combs_buf[0];
@ -39,5 +39,5 @@ __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const ke
pw.pw_len = pw_len + comb_len;
pws_amp[gid] = pw;
pws[gid] = pw;
}

View File

@ -15,5 +15,5 @@ __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const ke
const u32 w0r = bfs_buf[0].i;
pws_amp[gid].i[0] |= w0r;
pws[gid].i[0] |= w0r;
}

View File

@ -61606,6 +61606,39 @@ void append_0x80_4x4_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u
#endif
}
static void gpu_decompress_entry (__global pw_idx_t *pws_idx, __global u32 *pws_comp, pw_t *pw, const u64 gid)
{
const u32 off = pws_idx[gid].off;
const u32 cnt = pws_idx[gid].cnt;
const u32 len = pws_idx[gid].len;
#pragma unroll
for (u32 i = 0; i < 64; i++)
{
pw->i[i] = 0;
}
for (u32 i = 0, j = off; i < cnt; i++, j++)
{
pw->i[i] = pws_comp[j];
}
pw->pw_len = len;
}
__kernel void gpu_decompress (__global pw_idx_t *pws_idx, __global u32 *pws_comp, __global pw_t *pws_buf, const u64 gid_max)
{
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
pw_t pw;
gpu_decompress_entry (pws_idx, pws_comp, &pw, gid);
pws_buf[gid] = pw;
}
__kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_max)
{
const u64 gid = get_global_id (0);

View File

@ -1965,6 +1965,14 @@ typedef struct pw
} pw_t;
typedef struct pw_idx
{
u32 off;
u32 cnt;
u32 len;
} pw_idx_t;
typedef struct bf
{
u32 i;

View File

@ -37,6 +37,7 @@
## Improvements
##
- OpenCL Kernels: Add a decompressing kernel and a compressing host code in order to reduce PCIe transfer time
- OpenCL Kernels: Remove password length restriction to 16 for Cisco-PIX and Cisco-ASA hashes
- Terminal: Display Set Cost/Rounds During Benchmarking
- Terminal: Show [r]esume in prompt only in pause mode, and show [p]ause in prompt only in resume mode

View File

@ -860,6 +860,14 @@ typedef struct pw
} pw_t;
typedef struct pw_idx
{
u32 off;
u32 cnt;
u32 len;
} pw_idx_t;
typedef struct bf
{
u32 i;
@ -935,6 +943,7 @@ typedef struct hc_device_param
u32 kernel_threads_by_wgs_kernel_tm;
u32 kernel_threads_by_wgs_kernel_memset;
u32 kernel_threads_by_wgs_kernel_atinit;
u32 kernel_threads_by_wgs_kernel_decompress;
u32 kernel_loops;
u32 kernel_accel;
@ -951,6 +960,8 @@ typedef struct hc_device_param
size_t size_pws;
size_t size_pws_amp;
size_t size_pws_comp;
size_t size_pws_idx;
size_t size_tmps;
size_t size_hooks;
size_t size_bfs;
@ -973,8 +984,9 @@ typedef struct hc_device_param
void *hooks_buf;
pw_t *pws_buf;
u32 pws_cnt;
pw_idx_t *pws_idx;
u32 *pws_comp;
u32 pws_cnt;
u64 words_off;
u64 words_done;
@ -1038,6 +1050,7 @@ typedef struct hc_device_param
cl_kernel kernel_tm;
cl_kernel kernel_memset;
cl_kernel kernel_atinit;
cl_kernel kernel_decompress;
cl_context context;
@ -1049,6 +1062,8 @@ typedef struct hc_device_param
cl_mem d_pws_buf;
cl_mem d_pws_amp_buf;
cl_mem d_pws_comp_buf;
cl_mem d_pws_idx;
cl_mem d_words_buf_l;
cl_mem d_words_buf_r;
cl_mem d_rules;
@ -1092,6 +1107,7 @@ typedef struct hc_device_param
void *kernel_params_tm[PARAMCNT];
void *kernel_params_memset[PARAMCNT];
void *kernel_params_atinit[PARAMCNT];
void *kernel_params_decompress[PARAMCNT];
u32 kernel_params_buf32[PARAMCNT];
u64 kernel_params_buf64[PARAMCNT];
@ -1114,6 +1130,9 @@ typedef struct hc_device_param
u32 kernel_params_atinit_buf32[PARAMCNT];
u64 kernel_params_atinit_buf64[PARAMCNT];
u32 kernel_params_decompress_buf32[PARAMCNT];
u64 kernel_params_decompress_buf64[PARAMCNT];
} hc_device_param_t;
typedef struct opencl_ctx

View File

@ -116,16 +116,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (CL_rc == -1) return -1;
}
}
else
{
CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->d_pws_amp_buf, 0, 0, kernel_power_max * sizeof (pw_t), 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = run_kernel_amp (hashcat_ctx, device_param, kernel_power_max);
if (CL_rc == -1) return -1;
}
// Do a pre-autotune test run to find out if kernel runtime is above some TDR limit
@ -236,13 +226,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (CL_rc == -1) return -1;
if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
{
CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_pws_amp_buf, 0, device_param->size_pws_amp);
if (CL_rc == -1) return -1;
}
// reset other buffers in case autotune cracked something
CL_rc = run_kernel_memset (hashcat_ctx, device_param, device_param->d_plain_bufs, 0, device_param->size_plains);

View File

@ -158,7 +158,8 @@ static int calc_stdin (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
u32 words_extra_total = 0;
memset (device_param->pws_buf, 0, device_param->size_pws);
memset (device_param->pws_comp, 0, device_param->size_pws_comp);
memset (device_param->pws_idx, 0, device_param->size_pws_idx);
while (device_param->pws_cnt < device_param->kernel_power)
{
@ -490,7 +491,8 @@ static int calc (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
u32 words_extra_total = 0;
memset (device_param->pws_buf, 0, device_param->size_pws);
memset (device_param->pws_comp, 0, device_param->size_pws_comp);
memset (device_param->pws_idx, 0, device_param->size_pws_idx);
while (words_extra)
{

View File

@ -1146,10 +1146,29 @@ int hc_clReleaseEvent (hashcat_ctx_t *hashcat_ctx, cl_event event)
int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 gidd, pw_t *pw)
{
int CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, gidd * sizeof (pw_t), sizeof (pw_t), pw, 0, NULL, NULL);
pw_idx_t pw_idx;
int CL_rc;
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL);
if (CL_rc == -1) return -1;
const u32 off = pw_idx.off;
const u32 cnt = pw_idx.cnt;
const u32 len = pw_idx.len;
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, off * sizeof (u32), len * sizeof (u32), pw->i, 0, NULL, NULL);
if (CL_rc == -1) return -1;
for (u32 i = cnt; i < 64; i++)
{
pw->i[i] = 0;
}
pw->pw_len = len;
return 0;
}
@ -1225,7 +1244,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
if (run_init == true)
{
CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, device_param->d_pws_amp_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_amp_buf, device_param->d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL);
if (CL_rc == -1) return -1;
@ -1841,11 +1860,99 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
return 0;
}
int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num)
{
u64 num_elements = num;
device_param->kernel_params_decompress_buf64[3] = num_elements;
const u64 kernel_threads = device_param->kernel_threads_by_wgs_kernel_decompress;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = device_param->kernel_decompress;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
int CL_rc;
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]);
if (CL_rc == -1) return -1;
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
if (CL_rc == -1) return -1;
CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
if (CL_rc == -1) return -1;
return 0;
}
int run_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size)
{
return run_kernel_memset (hashcat_ctx, device_param, buf, 0, size);
}
void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u32 pws_cnt, const u8 chr)
{
// this function is used if we have to modify the compressed pws buffer in order to
// append some data to each password candidate
u32 *tmp_pws_comp = (u32 *) hcmalloc (device_param->size_pws_comp);
pw_idx_t *tmp_pws_idx = (pw_idx_t *) hcmalloc (device_param->size_pws_idx);
for (u32 i = 0; i < pws_cnt; i++)
{
pw_idx_t *pw_idx_src = device_param->pws_idx + i;
pw_idx_t *pw_idx_dst = tmp_pws_idx + i;
const u32 src_off = pw_idx_src->off;
const u32 src_cnt = pw_idx_src->cnt;
const u32 src_len = pw_idx_src->len;
u8 buf[256];
memcpy (buf, device_param->pws_comp + src_off, src_len);
buf[src_len] = chr;
const u32 dst_len = src_len + 1;
const u32 dst_pw_len4 = (dst_len + 3) & ~3; // round up to multiple of 4
const u32 dst_pw_len4_cnt = dst_pw_len4 / 4;
pw_idx_dst->cnt = dst_pw_len4_cnt;
pw_idx_dst->len = src_len; // this is intenionally! src_len can not be dst_len, we dont want the kernel to think 0x80 is part of the password
u8 *dst = (u8 *) (tmp_pws_comp + pw_idx_dst->off);
memcpy (dst, buf, dst_len);
memset (dst + dst_len, 0, dst_pw_len4 - dst_len);
// prepare next element
pw_idx_t *pw_idx_dst_next = pw_idx_dst + 1;
pw_idx_dst_next->off = pw_idx_dst->off + pw_idx_dst->cnt;
}
memcpy (device_param->pws_comp, tmp_pws_comp, device_param->size_pws_comp);
memcpy (device_param->pws_idx, tmp_pws_idx, device_param->size_pws_idx);
hcfree (tmp_pws_comp);
hcfree (tmp_pws_idx);
}
int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 pws_cnt)
{
combinator_ctx_t *combinator_ctx = hashcat_ctx->combinator_ctx;
@ -1855,7 +1962,21 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
{
const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
int CL_rc;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
if (CL_rc == -1) return -1;
const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
const u32 off = pw_idx->off;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
if (CL_rc == -1) return -1;
}
@ -1869,25 +1990,11 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
{
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x01;
}
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x80;
}
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
}
}
}
@ -1895,29 +2002,29 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
{
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x01;
}
rebuild_pws_compressed_append (device_param, pws_cnt, 0x01);
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x80;
}
rebuild_pws_compressed_append (device_param, pws_cnt, 0x80);
}
}
const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
int CL_rc;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
if (CL_rc == -1) return -1;
const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
const u32 off = pw_idx->off;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
if (CL_rc == -1) return -1;
}
@ -1925,13 +2032,41 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
{
if (user_options->attack_mode == ATTACK_MODE_COMBI)
{
const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
int CL_rc;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
if (CL_rc == -1) return -1;
const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
const u32 off = pw_idx->off;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
if (CL_rc == -1) return -1;
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
const int CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
int CL_rc;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL);
if (CL_rc == -1) return -1;
const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
const u32 off = pw_idx->off;
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL);
if (CL_rc == -1) return -1;
CL_rc = run_kernel_decompress (hashcat_ctx, device_param, pws_cnt);
if (CL_rc == -1) return -1;
}
@ -4096,6 +4231,14 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
const size_t size_pws_amp = size_pws;
// size_pws_comp
const size_t size_pws_comp = (size_t) kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
const size_t size_pws_idx = (size_t) (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
const size_t size_tmps = (size_t) kernel_power_max * hashconfig->tmp_size;
@ -4122,6 +4265,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
+ size_plains
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_results
+ size_root_css
+ size_rules
@ -4205,10 +4350,12 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
// find out if we would request too much memory on memory blocks which are based on kernel_accel
size_t size_pws = 4;
size_t size_pws_amp = 4;
size_t size_tmps = 4;
size_t size_hooks = 4;
size_t size_pws = 4;
size_t size_pws_amp = 4;
size_t size_pws_comp = 4;
size_t size_pws_idx = 4;
size_t size_tmps = 4;
size_t size_hooks = 4;
while (kernel_accel_max >= kernel_accel_min)
{
@ -4220,6 +4367,14 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
size_pws_amp = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? 1 : size_pws;
// size_pws_comp
size_pws_comp = (size_t) kernel_power_max * (sizeof (u32) * 64);
// size_pws_idx
size_pws_idx = (size_t) (kernel_power_max + 1) * sizeof (pw_idx_t);
// size_tmps
size_tmps = (size_t) kernel_power_max * hashconfig->tmp_size;
@ -4255,6 +4410,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
+ size_plains
+ size_pws
+ size_pws_amp
+ size_pws_comp
+ size_pws_idx
+ size_results
+ size_root_css
+ size_rules
@ -4302,14 +4459,16 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
}
*/
device_param->size_bfs = size_bfs;
device_param->size_combs = size_combs;
device_param->size_rules = size_rules;
device_param->size_rules_c = size_rules_c;
device_param->size_pws = size_pws;
device_param->size_pws_amp = size_pws_amp;
device_param->size_tmps = size_tmps;
device_param->size_hooks = size_hooks;
device_param->size_bfs = size_bfs;
device_param->size_combs = size_combs;
device_param->size_rules = size_rules;
device_param->size_rules_c = size_rules_c;
device_param->size_pws = size_pws;
device_param->size_pws_amp = size_pws_amp;
device_param->size_pws_comp = size_pws_comp;
device_param->size_pws_idx = size_pws_idx;
device_param->size_tmps = size_tmps;
device_param->size_hooks = size_hooks;
/**
* default building options
@ -4940,8 +5099,10 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
* global buffers
*/
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws, NULL, &device_param->d_pws_buf); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws_amp, NULL, &device_param->d_pws_amp_buf); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_pws, NULL, &device_param->d_pws_buf); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_pws_amp, NULL, &device_param->d_pws_amp_buf); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws_comp, NULL, &device_param->d_pws_comp_buf); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, size_pws_idx, NULL, &device_param->d_pws_idx); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_tmps, NULL, &device_param->d_tmps); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_WRITE, size_hooks, NULL, &device_param->d_hooks); if (CL_rc == -1) return -1;
CL_rc = hc_clCreateBuffer (hashcat_ctx, device_param->context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->d_bitmap_s1_a); if (CL_rc == -1) return -1;
@ -5034,9 +5195,13 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
* main host data
*/
pw_t *pws_buf = (pw_t *) hcmalloc (size_pws);
u32 *pws_comp = (u32 *) hcmalloc (size_pws_comp);
device_param->pws_buf = pws_buf;
device_param->pws_comp = pws_comp;
pw_idx_t *pws_idx = (pw_idx_t *) hcmalloc (size_pws_idx);
device_param->pws_idx = pws_idx;
pw_t *combs_buf = (pw_t *) hccalloc (KERNEL_COMBS, sizeof (pw_t));
@ -5062,9 +5227,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_buf32[33] = 0; // combs_mode
device_param->kernel_params_buf64[34] = 0; // gid_max
device_param->kernel_params[ 0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->d_pws_buf
: &device_param->d_pws_amp_buf;
device_param->kernel_params[ 0] = &device_param->d_pws_buf;
device_param->kernel_params[ 1] = &device_param->d_rules_c;
device_param->kernel_params[ 2] = &device_param->d_combs_c;
device_param->kernel_params[ 3] = &device_param->d_bfs_c;
@ -5119,7 +5282,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
}
else
{
device_param->kernel_params_mp[0] = &device_param->d_pws_buf;
device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->d_pws_buf
: &device_param->d_pws_amp_buf;
}
}
@ -5140,7 +5305,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_mp_l_buf32[8] = 0;
device_param->kernel_params_mp_l_buf64[9] = 0;
device_param->kernel_params_mp_l[0] = &device_param->d_pws_buf;
device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->d_pws_buf
: &device_param->d_pws_amp_buf;
device_param->kernel_params_mp_l[1] = &device_param->d_root_css_buf;
device_param->kernel_params_mp_l[2] = &device_param->d_markov_css_buf;
device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3];
@ -5194,6 +5361,15 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_atinit[0] = NULL;
device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1];
device_param->kernel_params_decompress_buf64[3] = 0; // gid_max
device_param->kernel_params_decompress[0] = &device_param->d_pws_idx;
device_param->kernel_params_decompress[1] = &device_param->d_pws_comp_buf;
device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->d_pws_buf
: &device_param->d_pws_amp_buf;
device_param->kernel_params_decompress[3] = &device_param->kernel_params_decompress_buf64[3];
/**
* kernel name
*/
@ -5451,6 +5627,21 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
// GPU decompress
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_decompress", &device_param->kernel_decompress);
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_decompress, &device_param->kernel_threads_by_wgs_kernel_decompress);
if (CL_rc == -1) return -1;
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1;
// MP start
if (user_options->attack_mode == ATTACK_MODE_BF)
@ -5537,7 +5728,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (CL_rc == -1) return -1;
}
for (u32 i = 7; i < 7; i++)
for (u32 i = 6; i < 7; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]);
@ -5549,6 +5740,8 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws_amp); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_comp_buf, device_param->size_pws_comp); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_idx, device_param->size_pws_idx); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1;
@ -5666,12 +5859,15 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->skipped == true) continue;
hcfree (device_param->pws_buf);
hcfree (device_param->pws_comp);
hcfree (device_param->pws_idx);
hcfree (device_param->combs_buf);
hcfree (device_param->hooks_buf);
if (device_param->d_pws_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_buf);
if (device_param->d_pws_amp_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_amp_buf);
if (device_param->d_pws_comp_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_comp_buf);
if (device_param->d_pws_idx) hc_clReleaseMemObject (hashcat_ctx, device_param->d_pws_idx);
if (device_param->d_rules) hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules);
if (device_param->d_rules_c) hc_clReleaseMemObject (hashcat_ctx, device_param->d_rules_c);
if (device_param->d_combs) hc_clReleaseMemObject (hashcat_ctx, device_param->d_combs);
@ -5720,6 +5916,7 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->kernel_amp) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_amp);
if (device_param->kernel_memset) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_memset);
if (device_param->kernel_atinit) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_atinit);
if (device_param->kernel_decompress)hc_clReleaseKernel (hashcat_ctx, device_param->kernel_decompress);
if (device_param->program) hc_clReleaseProgram (hashcat_ctx, device_param->program);
if (device_param->program_mp) hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
@ -5729,12 +5926,15 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->context) hc_clReleaseContext (hashcat_ctx, device_param->context);
device_param->pws_buf = NULL;
device_param->pws_comp = NULL;
device_param->pws_idx = NULL;
device_param->combs_buf = NULL;
device_param->hooks_buf = NULL;
device_param->d_pws_buf = NULL;
device_param->d_pws_amp_buf = NULL;
device_param->d_pws_comp_buf = NULL;
device_param->d_pws_idx = NULL;
device_param->d_rules = NULL;
device_param->d_rules_c = NULL;
device_param->d_combs = NULL;
@ -5782,6 +5982,7 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_amp = NULL;
device_param->kernel_memset = NULL;
device_param->kernel_atinit = NULL;
device_param->kernel_decompress = NULL;
device_param->program = NULL;
device_param->program_mp = NULL;
device_param->program_amp = NULL;
@ -5818,7 +6019,8 @@ void opencl_session_reset (hashcat_ctx_t *hashcat_ctx)
// some more resets:
if (device_param->pws_buf) memset (device_param->pws_buf, 0, device_param->size_pws);
if (device_param->pws_comp) memset (device_param->pws_comp, 0, device_param->size_pws_comp);
if (device_param->pws_idx) memset (device_param->pws_idx, 0, device_param->size_pws_idx);
device_param->pws_cnt = 0;

View File

@ -296,7 +296,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
pw.pw_len = pw_len;
CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);
if (CL_err != CL_SUCCESS) return -1;
}
@ -481,7 +481,6 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
device_param->kernel_params[18] = &device_param->d_esalt_bufs;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, device_param->size_pws); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_amp_buf, device_param->size_pws_amp); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps, device_param->size_tmps); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks, device_param->size_hooks); if (CL_rc == -1) return -1;
CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs, device_param->size_plains); if (CL_rc == -1) return -1;

View File

@ -242,16 +242,26 @@ void pw_add (hc_device_param_t *device_param, const u8 *pw_buf, const int pw_len
{
if (device_param->pws_cnt < device_param->kernel_power)
{
pw_t *pw = device_param->pws_buf + device_param->pws_cnt;
pw_idx_t *pw_idx = device_param->pws_idx + device_param->pws_cnt;
u8 *ptr = (u8 *) pw->i;
const u32 pw_len4 = (pw_len + 3) & ~3; // round up to multiple of 4
memcpy (ptr, pw_buf, pw_len);
const u32 pw_len4_cnt = pw_len4 / 4;
//memset zero to entire buffer done in outer loop
//memset (ptr + pw_len, 0, sizeof (pw->i) - pw_len);
pw_idx->cnt = pw_len4_cnt;
pw_idx->len = pw_len;
pw->pw_len = pw_len;
u8 *dst = (u8 *) (device_param->pws_comp + pw_idx->off);
memcpy (dst, pw_buf, pw_len);
memset (dst + pw_len, 0, pw_len4 - pw_len);
// prepare next element
pw_idx_t *pw_idx_next = pw_idx + 1;
pw_idx_next->off = pw_idx->off + pw_idx->cnt;
device_param->pws_cnt++;
}