From 5c01349ba63bd4496c9acb32991b1870d05a37a1 Mon Sep 17 00:00:00 2001 From: jsteube Date: Thu, 11 Feb 2016 09:54:50 +0100 Subject: [PATCH] Update event handling to workaround event handling error in nvidia opencl runtime --- include/common.h | 2 + include/ext_OpenCL.h | 3 ++ include/types.h | 5 +- src/ext_OpenCL.c | 13 ++++++ src/oclHashcat.c | 108 ++++++++++++++++++++++++++++++++----------- 5 files changed, 101 insertions(+), 30 deletions(-) diff --git a/include/common.h b/include/common.h index de176b614..6c221026e 100644 --- a/include/common.h +++ b/include/common.h @@ -99,6 +99,8 @@ typedef uint64_t u64; typedef uint32_t uint; // we need to get rid of this sooner or later, for consistency +#define EXEC_CACHE 1024 + #define SPEED_CACHE 128 #define SPEED_MAXAGE 4096 diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h index a0c37c3ef..926fc335c 100644 --- a/include/ext_OpenCL.h +++ b/include/ext_OpenCL.h @@ -61,6 +61,7 @@ typedef cl_int (*OCL_CLGETPROGRAMINFO) (cl_program, cl_program_inf typedef cl_int (*OCL_CLGETEVENTINFO) (cl_event, cl_event_info, size_t, void *, size_t *); typedef cl_int (*OCL_CLWAITFOREVENTS) (cl_uint, const cl_event *); typedef cl_int (*OCL_CLGETEVENTPROFILINGINFO) (cl_event, cl_profiling_info, size_t, void *, size_t *); +typedef cl_int (*CLRELEASEEVENT) (cl_event); typedef struct { @@ -98,6 +99,7 @@ typedef struct OCL_CLSETKERNELARG clSetKernelArg; OCL_CLWAITFOREVENTS clWaitForEvents; OCL_CLGETEVENTPROFILINGINFO clGetEventProfilingInfo; + CLRELEASEEVENT clReleaseEvent; } hc_opencl_lib_t; @@ -139,5 +141,6 @@ void hc_clGetProgramInfo (OCL_PTR *ocl, cl_program program, cl_program_info para void hc_clGetEventInfo (OCL_PTR *ocl, cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); void hc_clWaitForEvents (OCL_PTR *ocl, cl_uint num_events, const cl_event *event_list); void hc_clGetEventProfilingInfo (OCL_PTR *ocl, cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +void hc_clReleaseEvent (OCL_PTR *ocl, cl_event event); #endif diff --git a/include/types.h b/include/types.h index 5ce1d049f..7ff5b95c5 100644 --- a/include/types.h +++ b/include/types.h @@ -860,8 +860,6 @@ struct __hc_device_param cl_device_id device; cl_device_type device_type; - cl_event event; - uint device_id; uint platform_devices_id; // for mapping with hms devices @@ -924,6 +922,9 @@ struct __hc_device_param uint innerloop_pos; uint innerloop_left; + uint exec_pos; + double exec_ms[EXEC_CACHE]; + uint speed_pos; u64 speed_cnt[SPEED_CACHE]; float speed_ms[SPEED_CACHE]; diff --git a/src/ext_OpenCL.c b/src/ext_OpenCL.c index c982c801a..60b737816 100644 --- a/src/ext_OpenCL.c +++ b/src/ext_OpenCL.c @@ -114,6 +114,7 @@ int ocl_init (OCL_PTR *ocl) HC_LOAD_FUNC(ocl, clSetKernelArg, OCL_CLSETKERNELARG, OpenCL, 1) HC_LOAD_FUNC(ocl, clWaitForEvents, OCL_CLWAITFOREVENTS, OpenCL, 1) HC_LOAD_FUNC(ocl, clGetEventProfilingInfo, OCL_CLGETEVENTPROFILINGINFO, OpenCL, 1) + HC_LOAD_FUNC(ocl, clReleaseEvent, CLRELEASEEVENT, OpenCL, 1) return 0; } @@ -608,3 +609,15 @@ void hc_clGetEventProfilingInfo (OCL_PTR *ocl, cl_event event, cl_profiling_info exit (-1); } } + +void hc_clReleaseEvent (OCL_PTR *ocl, cl_event event) +{ + cl_int CL_err = ocl->clReleaseEvent (event); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: %s : %d : %s\n", "clReleaseEvent()", CL_err, val2cstr_cl (CL_err)); + + exit (-1); + } +} diff --git a/src/oclHashcat.c b/src/oclHashcat.c index 3446559e4..90976ee6e 100644 --- a/src/oclHashcat.c +++ b/src/oclHashcat.c @@ -789,17 +789,25 @@ void status_display_automat () if (device_param->skipped) continue; - if (device_param->event == NULL) continue; + double exec_ms_total = 0; - cl_ulong time_start; - cl_ulong time_end; + int exec_ms_cnt = 0; - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); + for (int i = 0; i < EXEC_CACHE; i++) + { + double exec_ms = device_param->exec_ms[i]; - const double total_time = (time_end - time_start) / 1000000.0; + if (exec_ms) + { + exec_ms_total += exec_ms; - fprintf (out, "%f\t", total_time); + exec_ms_cnt++; + } + } + + exec_ms_total /= exec_ms_cnt; + + fprintf (out, "%f\t", exec_ms_total); } /** @@ -1190,7 +1198,7 @@ void status_display () * exec time */ - double exec_runtime_ms[DEVICES_MAX] = { 0 }; + double exec_all_ms[DEVICES_MAX] = { 0 }; for (uint device_id = 0; device_id < data.devices_cnt; device_id++) { @@ -1198,17 +1206,25 @@ void status_display () if (device_param->skipped) continue; - if (device_param->event == NULL) continue; + double exec_ms_total = 0; - cl_ulong time_start; - cl_ulong time_end; + int exec_ms_cnt = 0; - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); + for (int i = 0; i < EXEC_CACHE; i++) + { + double exec_ms = device_param->exec_ms[i]; - const double total_time = (time_end - time_start) / 1000000.0; + if (exec_ms) + { + exec_ms_total += exec_ms; - exec_runtime_ms[device_id] = total_time; + exec_ms_cnt++; + } + } + + exec_ms_total /= exec_ms_cnt; + + exec_all_ms[device_id] = exec_ms_total; } /** @@ -1429,7 +1445,7 @@ void status_display () format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur)); - log_info ("Speed.Dev.#%d...: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_runtime_ms[device_id]); + log_info ("Speed.Dev.#%d...: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_all_ms[device_id]); } char display_all_cur[16] = { 0 }; @@ -1679,7 +1695,7 @@ static void status_benchmark () * exec time */ - double exec_runtime_ms[DEVICES_MAX] = { 0 }; + double exec_all_ms[DEVICES_MAX] = { 0 }; for (uint device_id = 0; device_id < data.devices_cnt; device_id++) { @@ -1687,17 +1703,25 @@ static void status_benchmark () if (device_param->skipped) continue; - if (device_param->event == NULL) continue; + double exec_ms_total = 0; - cl_ulong time_start; - cl_ulong time_end; + int exec_ms_cnt = 0; - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); - hc_clGetEventProfilingInfo (data.ocl, device_param->event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); + for (int i = 0; i < EXEC_CACHE; i++) + { + double exec_ms = device_param->exec_ms[i]; - const double total_time = (time_end - time_start) / 1000000.0; + if (exec_ms) + { + exec_ms_total += exec_ms; - exec_runtime_ms[device_id] = total_time; + exec_ms_cnt++; + } + } + + exec_ms_total /= exec_ms_cnt; + + exec_all_ms[device_id] = exec_ms_total; } for (uint device_id = 0; device_id < data.devices_cnt; device_id++) @@ -1712,7 +1736,7 @@ static void status_benchmark () format_speed_display (hashes_dev_ms[device_id] * 1000, display_dev_cur, sizeof (display_dev_cur)); - log_info ("Speed.Dev.#%d.: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_runtime_ms[device_id]); + log_info ("Speed.Dev.#%d.: %9sH/s (%0.2fms)", device_id + 1, display_dev_cur, exec_all_ms[device_id]); } char display_all_cur[16] = { 0 }; @@ -2470,11 +2494,35 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co hc_clFlush (data.ocl, device_param->command_queue); - //hc_clFinish (data.ocl, device_param->command_queue); - hc_clWaitForEvents (data.ocl, 1, &event); - if (event_update) device_param->event = event; + if (event_update) + { + cl_ulong time_start; + cl_ulong time_end; + + hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); + hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); + + const double exec_time = (time_end - time_start) / 1000000.0; + + uint exec_pos = device_param->exec_pos; + + device_param->exec_ms[exec_pos] = exec_time; + + exec_pos++; + + if (exec_pos == EXEC_CACHE) + { + exec_pos = 0; + } + + device_param->exec_pos = exec_pos; + } + + hc_clReleaseEvent (data.ocl, event); + + hc_clFinish (data.ocl, device_param->command_queue); } static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num) @@ -15585,6 +15633,10 @@ int main (int argc, char **argv) memset (device_param->speed_ms, 0, SPEED_CACHE * sizeof (float)); memset (device_param->speed_rec, 0, SPEED_CACHE * sizeof (hc_timer_t)); + device_param->exec_pos = 0; + + memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double)); + device_param->kernel_power = device_param->kernel_power_user; device_param->kernel_blocks = device_param->kernel_blocks_user;