mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-22 16:18:09 +00:00
OpenCL Kernels: Use a special kernel to initialize the password buffer used during autotune measurements to reduce startup time
This commit is contained in:
parent
7a8239b4c1
commit
7062425d2b
@ -61594,3 +61594,84 @@ __kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_ma
|
||||
|
||||
buf[gid] = (uint4) (value);
|
||||
}
|
||||
|
||||
__kernel void gpu_atinit (__global pw_t *buf, const u64 gid_max)
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
const u32 l32 = l32_from_64_S (gid);
|
||||
const u32 h32 = h32_from_64_S (gid);
|
||||
|
||||
pw_t pw;
|
||||
|
||||
pw.i[ 0] = 0x5c5c5c5c ^ l32;
|
||||
pw.i[ 1] = 0x36363636 ^ h32;
|
||||
pw.i[ 2] = 0;
|
||||
pw.i[ 3] = 0;
|
||||
pw.i[ 4] = 0;
|
||||
pw.i[ 5] = 0;
|
||||
pw.i[ 6] = 0;
|
||||
pw.i[ 7] = 0;
|
||||
pw.i[ 8] = 0;
|
||||
pw.i[ 9] = 0;
|
||||
pw.i[10] = 0;
|
||||
pw.i[11] = 0;
|
||||
pw.i[12] = 0;
|
||||
pw.i[13] = 0;
|
||||
pw.i[14] = 0;
|
||||
pw.i[15] = 0;
|
||||
pw.i[16] = 0;
|
||||
pw.i[17] = 0;
|
||||
pw.i[18] = 0;
|
||||
pw.i[19] = 0;
|
||||
pw.i[20] = 0;
|
||||
pw.i[21] = 0;
|
||||
pw.i[22] = 0;
|
||||
pw.i[23] = 0;
|
||||
pw.i[24] = 0;
|
||||
pw.i[25] = 0;
|
||||
pw.i[26] = 0;
|
||||
pw.i[27] = 0;
|
||||
pw.i[28] = 0;
|
||||
pw.i[29] = 0;
|
||||
pw.i[30] = 0;
|
||||
pw.i[31] = 0;
|
||||
pw.i[32] = 0;
|
||||
pw.i[33] = 0;
|
||||
pw.i[34] = 0;
|
||||
pw.i[35] = 0;
|
||||
pw.i[36] = 0;
|
||||
pw.i[37] = 0;
|
||||
pw.i[38] = 0;
|
||||
pw.i[39] = 0;
|
||||
pw.i[40] = 0;
|
||||
pw.i[41] = 0;
|
||||
pw.i[42] = 0;
|
||||
pw.i[43] = 0;
|
||||
pw.i[44] = 0;
|
||||
pw.i[45] = 0;
|
||||
pw.i[46] = 0;
|
||||
pw.i[47] = 0;
|
||||
pw.i[48] = 0;
|
||||
pw.i[49] = 0;
|
||||
pw.i[50] = 0;
|
||||
pw.i[51] = 0;
|
||||
pw.i[52] = 0;
|
||||
pw.i[53] = 0;
|
||||
pw.i[54] = 0;
|
||||
pw.i[55] = 0;
|
||||
pw.i[56] = 0;
|
||||
pw.i[57] = 0;
|
||||
pw.i[58] = 0;
|
||||
pw.i[59] = 0;
|
||||
pw.i[60] = 0;
|
||||
pw.i[61] = 0;
|
||||
pw.i[62] = 0;
|
||||
pw.i[63] = 0; // yep that's faster
|
||||
|
||||
pw.pw_len = 1 + (l32 & 15);
|
||||
|
||||
buf[gid] = pw;
|
||||
}
|
||||
|
@ -45,6 +45,7 @@
|
||||
- Files: Copy include/ folder and its content to case SHARED is set to 1 in Makefile
|
||||
- Hash Parser: Changed the way large strings are handled/truncated within the event buffer if they are too large to fit
|
||||
- Hash Parser: Fixed our use of strtok_r () calls
|
||||
- OpenCL Kernels: Use a special kernel to initialize the password buffer used during autotune measurements to reduce startup time
|
||||
- OpenCL Kernels: Replace variables from uXX to uXXa if used in __constant space
|
||||
- OpenCL Kernels: Use static declaraction for uXXa variables used in __constant space
|
||||
- Self Test: Skip self-test for mode 8900 user-configurable scrypt settings are incompatible to fixed settings in the self-test hash
|
||||
|
@ -63,6 +63,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
|
||||
int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
|
||||
int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);
|
||||
int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u64 num);
|
||||
int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
|
||||
int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
|
||||
int run_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
|
||||
int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 pws_cnt);
|
||||
|
@ -934,6 +934,7 @@ typedef struct hc_device_param
|
||||
u32 kernel_threads_by_wgs_kernel_amp;
|
||||
u32 kernel_threads_by_wgs_kernel_tm;
|
||||
u32 kernel_threads_by_wgs_kernel_memset;
|
||||
u32 kernel_threads_by_wgs_kernel_atinit;
|
||||
|
||||
u32 kernel_loops;
|
||||
u32 kernel_accel;
|
||||
@ -1034,6 +1035,7 @@ typedef struct hc_device_param
|
||||
cl_kernel kernel_amp;
|
||||
cl_kernel kernel_tm;
|
||||
cl_kernel kernel_memset;
|
||||
cl_kernel kernel_atinit;
|
||||
|
||||
cl_context context;
|
||||
|
||||
@ -1087,6 +1089,7 @@ typedef struct hc_device_param
|
||||
void *kernel_params_amp[PARAMCNT];
|
||||
void *kernel_params_tm[PARAMCNT];
|
||||
void *kernel_params_memset[PARAMCNT];
|
||||
void *kernel_params_atinit[PARAMCNT];
|
||||
|
||||
u32 kernel_params_buf32[PARAMCNT];
|
||||
u64 kernel_params_buf64[PARAMCNT];
|
||||
@ -1106,6 +1109,9 @@ typedef struct hc_device_param
|
||||
u32 kernel_params_memset_buf32[PARAMCNT];
|
||||
u64 kernel_params_memset_buf64[PARAMCNT];
|
||||
|
||||
u32 kernel_params_atinit_buf32[PARAMCNT];
|
||||
u64 kernel_params_atinit_buf64[PARAMCNT];
|
||||
|
||||
} hc_device_param_t;
|
||||
|
||||
typedef struct opencl_ctx
|
||||
|
@ -103,14 +103,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
|
||||
int CL_rc;
|
||||
|
||||
for (u32 i = 0; i < kernel_power_max; i++)
|
||||
{
|
||||
device_param->pws_buf[i].i[0] = i;
|
||||
device_param->pws_buf[i].i[1] = 0x01234567;
|
||||
device_param->pws_buf[i].pw_len = 7 + (i & 7);
|
||||
}
|
||||
|
||||
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
|
||||
CL_rc = run_kernel_atinit (hashcat_ctx, device_param, device_param->d_pws_buf, kernel_power_max);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
|
58
src/opencl.c
58
src/opencl.c
@ -1727,6 +1727,44 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num)
|
||||
{
|
||||
u64 num_elements = num;
|
||||
|
||||
device_param->kernel_params_atinit_buf64[1] = num_elements;
|
||||
|
||||
const u64 kernel_threads = device_param->kernel_threads_by_wgs_kernel_atinit;
|
||||
|
||||
cl_kernel kernel = device_param->kernel_atinit;
|
||||
|
||||
const size_t global_work_size[3] = { num_elements, 1, 1 };
|
||||
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
|
||||
|
||||
int CL_rc;
|
||||
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = hc_clFlush (hashcat_ctx, device_param->command_queue);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = hc_clFinish (hashcat_ctx, device_param->command_queue);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size)
|
||||
{
|
||||
const u64 num16d = size / 16;
|
||||
@ -5075,6 +5113,11 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_params_memset[1] = &device_param->kernel_params_memset_buf32[1];
|
||||
device_param->kernel_params_memset[2] = &device_param->kernel_params_memset_buf64[2];
|
||||
|
||||
device_param->kernel_params_atinit_buf64[1] = 0; // gid_max
|
||||
|
||||
device_param->kernel_params_atinit[0] = NULL;
|
||||
device_param->kernel_params_atinit[1] = &device_param->kernel_params_atinit_buf64[1];
|
||||
|
||||
/**
|
||||
* kernel name
|
||||
*/
|
||||
@ -5319,6 +5362,19 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1;
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1;
|
||||
|
||||
// GPU autotune init
|
||||
|
||||
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_atinit", &device_param->kernel_atinit);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_atinit, &device_param->kernel_threads_by_wgs_kernel_atinit);
|
||||
|
||||
if (CL_rc == -1) return -1;
|
||||
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1;
|
||||
CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1;
|
||||
|
||||
// MP start
|
||||
|
||||
if (user_options->attack_mode == ATTACK_MODE_BF)
|
||||
@ -5587,6 +5643,7 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
||||
if (device_param->kernel_tm) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_tm);
|
||||
if (device_param->kernel_amp) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_amp);
|
||||
if (device_param->kernel_memset) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_memset);
|
||||
if (device_param->kernel_atinit) hc_clReleaseKernel (hashcat_ctx, device_param->kernel_atinit);
|
||||
|
||||
if (device_param->program) hc_clReleaseProgram (hashcat_ctx, device_param->program);
|
||||
if (device_param->program_mp) hc_clReleaseProgram (hashcat_ctx, device_param->program_mp);
|
||||
@ -5648,6 +5705,7 @@ void opencl_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_tm = NULL;
|
||||
device_param->kernel_amp = NULL;
|
||||
device_param->kernel_memset = NULL;
|
||||
device_param->kernel_atinit = NULL;
|
||||
device_param->program = NULL;
|
||||
device_param->program_mp = NULL;
|
||||
device_param->program_amp = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user