1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-03-04 09:06:05 +00:00

Prepare CUDA events

This commit is contained in:
Jens Steube 2019-05-04 10:44:03 +02:00
parent f2948460c9
commit 4df00033d7
3 changed files with 559 additions and 190 deletions

View File

@ -42,27 +42,33 @@ int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog,
int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev);
int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx);
int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx);
int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev);
int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count);
int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice *device, int ordinal);
int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev);
int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev);
int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags);
int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd);
int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream);
int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent);
int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc);
int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags);
int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx);
int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);

View File

@ -1206,7 +1206,6 @@ typedef struct hc_device_param
double spin_damp;
void *kernel_params[PARAMCNT];
void *kernel_params_mp[PARAMCNT];
void *kernel_params_mp_r[PARAMCNT];

View File

@ -1599,6 +1599,168 @@ int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx)
return 0;
}
int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventCreate (phEvent, Flags);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventCreate(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventCreate(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventDestroy (hEvent);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventDestroy(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventDestroy(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventElapsedTime (pMilliseconds, hStart, hEnd);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventElapsedTime(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventElapsedTime(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventQuery (hEvent);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventQuery(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventQuery(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventRecord (hEvent, hStream);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventRecord(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventRecord(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuEventSynchronize (hEvent);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuEventSynchronize(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuEventSynchronize(): %d", CU_err);
}
return -1;
}
return 0;
}
// OpenCL
int ocl_init (hashcat_ctx_t *hashcat_ctx)
@ -2624,100 +2786,72 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
device_param->kernel_params_buf64[34] = num;
u64 kernel_threads = 0;
cl_kernel kernel = NULL;
switch (kern_run)
{
case KERN_RUN_1:
kernel = device_param->opencl_kernel1;
kernel_threads = device_param->kernel_wgs1;
break;
case KERN_RUN_12:
kernel = device_param->opencl_kernel12;
kernel_threads = device_param->kernel_wgs12;
break;
case KERN_RUN_2:
kernel = device_param->opencl_kernel2;
kernel_threads = device_param->kernel_wgs2;
break;
case KERN_RUN_23:
kernel = device_param->opencl_kernel23;
kernel_threads = device_param->kernel_wgs23;
break;
case KERN_RUN_3:
kernel = device_param->opencl_kernel3;
kernel_threads = device_param->kernel_wgs3;
break;
case KERN_RUN_4:
kernel = device_param->opencl_kernel4;
kernel_threads = device_param->kernel_wgs4;
break;
case KERN_RUN_INIT2:
kernel = device_param->opencl_kernel_init2;
kernel_threads = device_param->kernel_wgs_init2;
break;
case KERN_RUN_LOOP2:
kernel = device_param->opencl_kernel_loop2;
kernel_threads = device_param->kernel_wgs_loop2;
break;
case KERN_RUN_AUX1:
kernel = device_param->opencl_kernel_aux1;
kernel_threads = device_param->kernel_wgs_aux1;
break;
case KERN_RUN_AUX2:
kernel = device_param->opencl_kernel_aux2;
kernel_threads = device_param->kernel_wgs_aux2;
break;
case KERN_RUN_AUX3:
kernel = device_param->opencl_kernel_aux3;
kernel_threads = device_param->kernel_wgs_aux3;
break;
case KERN_RUN_AUX4:
kernel = device_param->opencl_kernel_aux4;
kernel_threads = device_param->kernel_wgs_aux4;
break;
default:
event_log_error (hashcat_ctx, "Invalid kernel specified.");
return -1;
case KERN_RUN_1: kernel_threads = device_param->kernel_wgs1; break;
case KERN_RUN_12: kernel_threads = device_param->kernel_wgs12; break;
case KERN_RUN_2: kernel_threads = device_param->kernel_wgs2; break;
case KERN_RUN_23: kernel_threads = device_param->kernel_wgs23; break;
case KERN_RUN_3: kernel_threads = device_param->kernel_wgs3; break;
case KERN_RUN_4: kernel_threads = device_param->kernel_wgs4; break;
case KERN_RUN_INIT2: kernel_threads = device_param->kernel_wgs_init2; break;
case KERN_RUN_LOOP2: kernel_threads = device_param->kernel_wgs_loop2; break;
case KERN_RUN_AUX1: kernel_threads = device_param->kernel_wgs_aux1; break;
case KERN_RUN_AUX2: kernel_threads = device_param->kernel_wgs_aux2; break;
case KERN_RUN_AUX3: kernel_threads = device_param->kernel_wgs_aux3; break;
case KERN_RUN_AUX4: kernel_threads = device_param->kernel_wgs_aux4; break;
}
kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
// kernel_threads = power_of_two_floor_32 (kernel_threads);
if (device_param->is_cuda == true)
{
num_elements = CEILDIV (num_elements, kernel_threads);
}
if (device_param->is_opencl == true)
{
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
}
int CL_rc;
int CU_rc;
for (u32 i = 0; i <= 23; i++)
if (device_param->is_cuda == true)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_mem), device_param->kernel_params[i]);
CUfunction cuda_function = NULL;
if (CL_rc == -1) return -1;
if (device_param->is_cuda == true)
{
switch (kern_run)
{
case KERN_RUN_1: cuda_function = device_param->cuda_function1; break;
case KERN_RUN_12: cuda_function = device_param->cuda_function12; break;
case KERN_RUN_2: cuda_function = device_param->cuda_function2; break;
case KERN_RUN_23: cuda_function = device_param->cuda_function23; break;
case KERN_RUN_3: cuda_function = device_param->cuda_function3; break;
case KERN_RUN_4: cuda_function = device_param->cuda_function4; break;
case KERN_RUN_INIT2: cuda_function = device_param->cuda_function_init2; break;
case KERN_RUN_LOOP2: cuda_function = device_param->cuda_function_loop2; break;
case KERN_RUN_AUX1: cuda_function = device_param->cuda_function_aux1; break;
case KERN_RUN_AUX2: cuda_function = device_param->cuda_function_aux2; break;
case KERN_RUN_AUX3: cuda_function = device_param->cuda_function_aux3; break;
case KERN_RUN_AUX4: cuda_function = device_param->cuda_function_aux4; break;
}
}
for (u32 i = 24; i <= 33; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_uint), device_param->kernel_params[i]);
if (CL_rc == -1) return -1;
}
for (u32 i = 34; i <= 34; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]);
if (CL_rc == -1) return -1;
}
cl_event event;
CUevent cuda_event;
/*
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
if (CL_rc == -1) return -1;
}
@ -2750,7 +2884,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
if (CL_rc == -1) return -1;
}
@ -2763,11 +2897,11 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
const u32 iterationm = iteration % EXPECTED_ITERATIONS;
cl_int event_status;
cl_int opencl_event_status;
size_t param_value_size_ret;
CL_rc = hc_clGetEventInfo (hashcat_ctx, event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (event_status), &event_status, &param_value_size_ret);
CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
if (CL_rc == -1) return -1;
@ -2775,7 +2909,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
double spin_total = device_param->spin_damp;
while (event_status != CL_COMPLETE)
while (opencl_event_status != CL_COMPLETE)
{
if (status_ctx->devices_status == STATUS_RUNNING)
{
@ -2800,7 +2934,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
sleep (0);
}
CL_rc = hc_clGetEventInfo (hashcat_ctx, event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (event_status), &event_status, &param_value_size_ret);
CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
if (CL_rc == -1) return -1;
@ -2810,15 +2944,15 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
}
}
CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &event);
CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event);
if (CL_rc == -1) return -1;
cl_ulong time_start;
cl_ulong time_end;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); if (CL_rc == -1) return -1;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); if (CL_rc == -1) return -1;
const double exec_us = (double) (time_end - time_start) / 1000;
@ -2858,13 +2992,216 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
device_param->exec_pos = exec_pos;
}
CL_rc = hc_clReleaseEvent (hashcat_ctx, event);
CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event);
if (CL_rc == -1) return -1;
CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
if (CL_rc == -1) return -1;
*/
}
if (device_param->is_opencl == true)
{
cl_kernel opencl_kernel = NULL;
if (device_param->is_opencl == true)
{
switch (kern_run)
{
case KERN_RUN_1: opencl_kernel = device_param->opencl_kernel1; break;
case KERN_RUN_12: opencl_kernel = device_param->opencl_kernel12; break;
case KERN_RUN_2: opencl_kernel = device_param->opencl_kernel2; break;
case KERN_RUN_23: opencl_kernel = device_param->opencl_kernel23; break;
case KERN_RUN_3: opencl_kernel = device_param->opencl_kernel3; break;
case KERN_RUN_4: opencl_kernel = device_param->opencl_kernel4; break;
case KERN_RUN_INIT2: opencl_kernel = device_param->opencl_kernel_init2; break;
case KERN_RUN_LOOP2: opencl_kernel = device_param->opencl_kernel_loop2; break;
case KERN_RUN_AUX1: opencl_kernel = device_param->opencl_kernel_aux1; break;
case KERN_RUN_AUX2: opencl_kernel = device_param->opencl_kernel_aux2; break;
case KERN_RUN_AUX3: opencl_kernel = device_param->opencl_kernel_aux3; break;
case KERN_RUN_AUX4: opencl_kernel = device_param->opencl_kernel_aux4; break;
}
}
for (u32 i = 0; i <= 23; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_mem), device_param->kernel_params[i]);
if (CL_rc == -1) return -1;
}
for (u32 i = 24; i <= 33; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_uint), device_param->kernel_params[i]);
if (CL_rc == -1) return -1;
}
for (u32 i = 34; i <= 34; i++)
{
CL_rc = hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]);
if (CL_rc == -1) return -1;
}
cl_event opencl_event;
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
if (CL_rc == -1) return -1;
}
else
{
if (kern_run == KERN_RUN_1)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
}
else if (kern_run == KERN_RUN_2)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
}
else if (kern_run == KERN_RUN_3)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP)
{
num_elements = CEILDIV (num_elements, device_param->vector_width);
}
}
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_rc = hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event);
if (CL_rc == -1) return -1;
}
CL_rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
if (CL_rc == -1) return -1;
// spin damper section
const u32 iterationm = iteration % EXPECTED_ITERATIONS;
cl_int opencl_event_status;
size_t param_value_size_ret;
CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
if (CL_rc == -1) return -1;
if (device_param->spin_damp > 0)
{
double spin_total = device_param->spin_damp;
while (opencl_event_status != CL_COMPLETE)
{
if (status_ctx->devices_status == STATUS_RUNNING)
{
switch (kern_run)
{
case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break;
}
}
else
{
// we were told to be nice
sleep (0);
}
CL_rc = hc_clGetEventInfo (hashcat_ctx, opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof (opencl_event_status), &opencl_event_status, &param_value_size_ret);
if (CL_rc == -1) return -1;
spin_total += device_param->spin_damp;
if (spin_total > 1) break;
}
}
CL_rc = hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event);
if (CL_rc == -1) return -1;
cl_ulong time_start;
cl_ulong time_end;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); if (CL_rc == -1) return -1;
CL_rc = hc_clGetEventProfilingInfo (hashcat_ctx, opencl_event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); if (CL_rc == -1) return -1;
const double exec_us = (double) (time_end - time_start) / 1000;
if (device_param->spin_damp > 0)
{
if (status_ctx->devices_status == STATUS_RUNNING)
{
switch (kern_run)
{
case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break;
case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break;
case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break;
case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break;
case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break;
case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break;
case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break;
case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break;
case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break;
}
}
}
if (event_update)
{
u32 exec_pos = device_param->exec_pos;
device_param->exec_msec[exec_pos] = exec_us / 1000;
exec_pos++;
if (exec_pos == EXEC_CACHE)
{
exec_pos = 0;
}
device_param->exec_pos = exec_pos;
}
CL_rc = hc_clReleaseEvent (hashcat_ctx, opencl_event);
if (CL_rc == -1) return -1;
CL_rc = hc_clFinish (hashcat_ctx, device_param->opencl_command_queue);
if (CL_rc == -1) return -1;
}
return 0;
}
@ -9705,6 +10042,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_cuda == true)
{
device_param->kernel_params[ 0] = &device_param->cuda_d_pws_buf;
device_param->kernel_params[ 4] = &device_param->cuda_d_tmps;
device_param->kernel_params[ 5] = &device_param->cuda_d_hooks;
}
if (device_param->is_opencl == true)
@ -9729,6 +10069,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (device_param->is_cuda == true)
{
device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->cuda_d_pws_buf
: &device_param->cuda_d_pws_amp_buf;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1;
}
if (device_param->is_opencl == true)
@ -9746,6 +10091,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (device_param->is_cuda == true)
{
device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->cuda_d_pws_buf
: &device_param->cuda_d_pws_amp_buf;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1;
}
if (device_param->is_opencl == true)
@ -9766,6 +10116,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (device_param->is_cuda == true)
{
device_param->kernel_params_amp[0] = &device_param->cuda_d_pws_buf;
device_param->kernel_params_amp[1] = &device_param->cuda_d_pws_amp_buf;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1;
}
if (device_param->is_opencl == true)
@ -9781,6 +10136,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (device_param->is_cuda == true)
{
device_param->kernel_params_decompress[0] = &device_param->cuda_d_pws_idx;
device_param->kernel_params_decompress[1] = &device_param->cuda_d_pws_comp_buf;
device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
? &device_param->cuda_d_pws_buf
: &device_param->cuda_d_pws_amp_buf;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1;
}
if (device_param->is_opencl == true)