1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-01 12:28:24 +00:00

Update event handling to workaround event handling error in nvidia opencl runtime

This commit is contained in:
jsteube 2016-02-11 09:54:50 +01:00
parent bae88174ab
commit 5c01349ba6
5 changed files with 101 additions and 30 deletions

View File

@ -99,6 +99,8 @@ typedef uint64_t u64;
typedef uint32_t uint; // we need to get rid of this sooner or later, for consistency typedef uint32_t uint; // we need to get rid of this sooner or later, for consistency
#define EXEC_CACHE 1024
#define SPEED_CACHE 128 #define SPEED_CACHE 128
#define SPEED_MAXAGE 4096 #define SPEED_MAXAGE 4096

View File

@ -61,6 +61,7 @@ typedef cl_int (*OCL_CLGETPROGRAMINFO) (cl_program, cl_program_inf
typedef cl_int (*OCL_CLGETEVENTINFO) (cl_event, cl_event_info, size_t, void *, size_t *); typedef cl_int (*OCL_CLGETEVENTINFO) (cl_event, cl_event_info, size_t, void *, size_t *);
typedef cl_int (*OCL_CLWAITFOREVENTS) (cl_uint, const cl_event *); typedef cl_int (*OCL_CLWAITFOREVENTS) (cl_uint, const cl_event *);
typedef cl_int (*OCL_CLGETEVENTPROFILINGINFO) (cl_event, cl_profiling_info, size_t, void *, size_t *); typedef cl_int (*OCL_CLGETEVENTPROFILINGINFO) (cl_event, cl_profiling_info, size_t, void *, size_t *);
typedef cl_int (*CLRELEASEEVENT) (cl_event);
typedef struct typedef struct
{ {
@ -98,6 +99,7 @@ typedef struct
OCL_CLSETKERNELARG clSetKernelArg; OCL_CLSETKERNELARG clSetKernelArg;
OCL_CLWAITFOREVENTS clWaitForEvents; OCL_CLWAITFOREVENTS clWaitForEvents;
OCL_CLGETEVENTPROFILINGINFO clGetEventProfilingInfo; OCL_CLGETEVENTPROFILINGINFO clGetEventProfilingInfo;
CLRELEASEEVENT clReleaseEvent;
} hc_opencl_lib_t; } hc_opencl_lib_t;
@ -139,5 +141,6 @@ void hc_clGetProgramInfo (OCL_PTR *ocl, cl_program program, cl_program_info para
void hc_clGetEventInfo (OCL_PTR *ocl, cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); void hc_clGetEventInfo (OCL_PTR *ocl, cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
void hc_clWaitForEvents (OCL_PTR *ocl, cl_uint num_events, const cl_event *event_list); void hc_clWaitForEvents (OCL_PTR *ocl, cl_uint num_events, const cl_event *event_list);
void hc_clGetEventProfilingInfo (OCL_PTR *ocl, cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); void hc_clGetEventProfilingInfo (OCL_PTR *ocl, cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
void hc_clReleaseEvent (OCL_PTR *ocl, cl_event event);
#endif #endif

View File

@ -860,8 +860,6 @@ struct __hc_device_param
cl_device_id device; cl_device_id device;
cl_device_type device_type; cl_device_type device_type;
cl_event event;
uint device_id; uint device_id;
uint platform_devices_id; // for mapping with hms devices uint platform_devices_id; // for mapping with hms devices
@ -924,6 +922,9 @@ struct __hc_device_param
uint innerloop_pos; uint innerloop_pos;
uint innerloop_left; uint innerloop_left;
uint exec_pos;
double exec_ms[EXEC_CACHE];
uint speed_pos; uint speed_pos;
u64 speed_cnt[SPEED_CACHE]; u64 speed_cnt[SPEED_CACHE];
float speed_ms[SPEED_CACHE]; float speed_ms[SPEED_CACHE];

View File

@ -114,6 +114,7 @@ int ocl_init (OCL_PTR *ocl)
HC_LOAD_FUNC(ocl, clSetKernelArg, OCL_CLSETKERNELARG, OpenCL, 1) HC_LOAD_FUNC(ocl, clSetKernelArg, OCL_CLSETKERNELARG, OpenCL, 1)
HC_LOAD_FUNC(ocl, clWaitForEvents, OCL_CLWAITFOREVENTS, OpenCL, 1) HC_LOAD_FUNC(ocl, clWaitForEvents, OCL_CLWAITFOREVENTS, OpenCL, 1)
HC_LOAD_FUNC(ocl, clGetEventProfilingInfo, OCL_CLGETEVENTPROFILINGINFO, OpenCL, 1) HC_LOAD_FUNC(ocl, clGetEventProfilingInfo, OCL_CLGETEVENTPROFILINGINFO, OpenCL, 1)
HC_LOAD_FUNC(ocl, clReleaseEvent, CLRELEASEEVENT, OpenCL, 1)
return 0; return 0;
} }
@ -608,3 +609,15 @@ void hc_clGetEventProfilingInfo (OCL_PTR *ocl, cl_event event, cl_profiling_info
exit (-1); exit (-1);
} }
} }
void hc_clReleaseEvent (OCL_PTR *ocl, cl_event event)
{
cl_int CL_err = ocl->clReleaseEvent (event);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: %s : %d : %s\n", "clReleaseEvent()", CL_err, val2cstr_cl (CL_err));
exit (-1);
}
}

View File

@ -789,17 +789,25 @@ void status_display_automat ()
if (device_param->skipped) continue; if (device_param->skipped) continue;
if (device_param->event == NULL) continue; double exec_ms_total = 0;
cl_ulong time_start; int exec_ms_cnt = 0;
cl_ulong time_end;
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); for (int i = 0; i < EXEC_CACHE; i++)
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); {
double exec_ms = device_param->exec_ms[i];
const double total_time = (time_end - time_start) / 1000000.0; if (exec_ms)
{
exec_ms_total += exec_ms;
fprintf (out, "%f\t", total_time); exec_ms_cnt++;
}
}
exec_ms_total /= exec_ms_cnt;
fprintf (out, "%f\t", exec_ms_total);
} }
/** /**
@ -1190,7 +1198,7 @@ void status_display ()
* exec time * exec time
*/ */
double exec_runtime_ms[DEVICES_MAX] = { 0 }; double exec_all_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++) for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{ {
@ -1198,17 +1206,25 @@ void status_display ()
if (device_param->skipped) continue; if (device_param->skipped) continue;
if (device_param->event == NULL) continue; double exec_ms_total = 0;
cl_ulong time_start; int exec_ms_cnt = 0;
cl_ulong time_end;
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); for (int i = 0; i < EXEC_CACHE; i++)
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); {
double exec_ms = device_param->exec_ms[i];
const double total_time = (time_end - time_start) / 1000000.0; if (exec_ms)
{
exec_ms_total += exec_ms;
exec_runtime_ms[device_id] = total_time; exec_ms_cnt++;
}
}
exec_ms_total /= exec_ms_cnt;
exec_all_ms[device_id] = exec_ms_total;
} }
/** /**
@ -1429,7 +1445,7 @@ void status_display ()
format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur)); format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur));
log_info ("Speed.Dev.#%d...: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_runtime_ms[device_id]); log_info ("Speed.Dev.#%d...: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_all_ms[device_id]);
} }
char display_all_cur[16] = { 0 }; char display_all_cur[16] = { 0 };
@ -1679,7 +1695,7 @@ static void status_benchmark ()
* exec time * exec time
*/ */
double exec_runtime_ms[DEVICES_MAX] = { 0 }; double exec_all_ms[DEVICES_MAX] = { 0 };
for (uint device_id = 0; device_id < data.devices_cnt; device_id++) for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
{ {
@ -1687,17 +1703,25 @@ static void status_benchmark ()
if (device_param->skipped) continue; if (device_param->skipped) continue;
if (device_param->event == NULL) continue; double exec_ms_total = 0;
cl_ulong time_start; int exec_ms_cnt = 0;
cl_ulong time_end;
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); for (int i = 0; i < EXEC_CACHE; i++)
hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); {
double exec_ms = device_param->exec_ms[i];
const double total_time = (time_end - time_start) / 1000000.0; if (exec_ms)
{
exec_ms_total += exec_ms;
exec_runtime_ms[device_id] = total_time; exec_ms_cnt++;
}
}
exec_ms_total /= exec_ms_cnt;
exec_all_ms[device_id] = exec_ms_total;
} }
for (uint device_id = 0; device_id < data.devices_cnt; device_id++) for (uint device_id = 0; device_id < data.devices_cnt; device_id++)
@ -1712,7 +1736,7 @@ static void status_benchmark ()
format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur)); format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur));
log_info ("Speed.Dev.#%d.: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_runtime_ms[device_id]); log_info ("Speed.Dev.#%d.: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_all_ms[device_id]);
} }
char display_all_cur[16] = { 0 }; char display_all_cur[16] = { 0 };
@ -2470,11 +2494,35 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co
hc_clFlush (data.ocl, device_param->command_queue); hc_clFlush (data.ocl, device_param->command_queue);
//hc_clFinish (data.ocl, device_param->command_queue);
hc_clWaitForEvents (data.ocl, 1, &event); hc_clWaitForEvents (data.ocl, 1, &event);
if (event_update) device_param->event = event; if (event_update)
{
cl_ulong time_start;
cl_ulong time_end;
hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
const double exec_time = (time_end - time_start) / 1000000.0;
uint exec_pos = device_param->exec_pos;
device_param->exec_ms[exec_pos] = exec_time;
exec_pos++;
if (exec_pos == EXEC_CACHE)
{
exec_pos = 0;
}
device_param->exec_pos = exec_pos;
}
hc_clReleaseEvent (data.ocl, event);
hc_clFinish (data.ocl, device_param->command_queue);
} }
static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num) static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num)
@ -15585,6 +15633,10 @@ int main (int argc, char **argv)
memset (device_param->speed_ms, 0, SPEED_CACHE * sizeof (float)); memset (device_param->speed_ms, 0, SPEED_CACHE * sizeof (float));
memset (device_param->speed_rec, 0, SPEED_CACHE * sizeof (hc_timer_t)); memset (device_param->speed_rec, 0, SPEED_CACHE * sizeof (hc_timer_t));
device_param->exec_pos = 0;
memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double));
device_param->kernel_power = device_param->kernel_power_user; device_param->kernel_power = device_param->kernel_power_user;
device_param->kernel_blocks = device_param->kernel_blocks_user; device_param->kernel_blocks = device_param->kernel_blocks_user;