Improve autotune and synchronize results on same OpenCL devices

pull/1511/head
Jens Steube 6 years ago
parent 3d2c0018fc
commit fb00b528c4

@ -81,6 +81,7 @@ void opencl_ctx_destroy (hashcat_ctx_t *hashcat_ctx);
int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime);
void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx);
void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx);
void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx);
void opencl_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx);

@ -152,8 +152,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
}
}
#define VERIFIER_CNT 1
// first find out highest kernel-loops that stays below target_msec
if (kernel_loops_min < kernel_loops_max)
@ -164,20 +162,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops);
for (int i = 0; i < VERIFIER_CNT; i++)
{
double exec_msec_v = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops);
exec_msec = MIN (exec_msec, exec_msec_v);
}
if (exec_msec < target_msec) break;
}
}
// now the same for kernel-accel but with the new kernel-loops from previous loop set
#define STEPS_CNT 10
#define STEPS_CNT 16
if (kernel_accel_min < kernel_accel_max)
{
@ -190,13 +181,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops);
for (int verifier_idx = 0; verifier_idx < VERIFIER_CNT; verifier_idx++)
{
double exec_msec_v = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops);
exec_msec = MIN (exec_msec, exec_msec_v);
}
if (exec_msec > target_msec) break;
kernel_accel = kernel_accel_try;
@ -213,13 +197,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
for (int verifier_idx = 0; verifier_idx < VERIFIER_CNT; verifier_idx++)
{
double exec_msec_pre_final_v = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
exec_msec_pre_final = MIN (exec_msec_pre_final, exec_msec_pre_final_v);
}
u32 diff = kernel_loops - kernel_accel;
if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max))
@ -235,26 +212,25 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (kernel_accel_try > kernel_accel_max) break;
if (kernel_loops_try < kernel_loops_min) break;
u32 diff_new = kernel_loops_try - kernel_accel_try;
u32 diff_new = 0;
if (kernel_accel_try > kernel_loops_try)
{
diff_new = kernel_accel_try - kernel_loops_try;
}
else
{
diff_new = kernel_loops_try - kernel_accel_try;
}
if (diff_new > diff) break;
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
for (int verifier_idx = 0; verifier_idx < VERIFIER_CNT; verifier_idx++)
{
double exec_msec_v = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
exec_msec = MIN (exec_msec, exec_msec_v);
}
for (int verifier_idx = 0; verifier_idx < VERIFIER_CNT; verifier_idx++)
{
exec_msec_pre_final = exec_msec;
exec_msec_pre_final = exec_msec;
kernel_accel = kernel_accel_try;
kernel_loops = kernel_loops_try;
}
kernel_accel = kernel_accel_try;
kernel_loops = kernel_loops_try;
}
}
@ -327,27 +303,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
device_param->kernel_power = kernel_power;
#if defined (DEBUG)
user_options_t *user_options = hashcat_ctx->user_options;
if (user_options->quiet == false)
{
clear_prompt (hashcat_ctx);
printf
(
"- Device #%u: autotuned kernel-accel to %u" EOL
"- Device #%u: autotuned kernel-loops to %u" EOL,
device_param->device_id + 1, kernel_accel,
device_param->device_id + 1, kernel_loops
);
send_prompt (hashcat_ctx);
}
#endif
return 0;
}

@ -193,6 +193,12 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
EVENT (EVENT_AUTOTUNE_FINISHED);
/**
* find same opencl devices and equal results
*/
opencl_ctx_devices_sync_tuning (hashcat_ctx);
/**
* autotune modified kernel_accel, which modifies opencl_ctx->kernel_power_all
*/

@ -3553,6 +3553,62 @@ void opencl_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
opencl_ctx->need_sysfs = false;
}
static bool is_same_device_type (const hc_device_param_t *src, const hc_device_param_t *dst)
{
if (strcmp (src->device_name, dst->device_name) != 0) return false;
if (strcmp (src->device_vendor, dst->device_vendor) != 0) return false;
if (strcmp (src->device_version, dst->device_version) != 0) return false;
if (strcmp (src->driver_version, dst->driver_version) != 0) return false;
if (src->device_processors != dst->device_processors) return false;
if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
if (src->device_maxworkgroup_size != dst->device_maxworkgroup_size) return false;
// memory size can be different, depending on which gpu has a monitor connected
// if (src->device_maxmem_alloc != dst->device_maxmem_alloc) return false;
// if (src->device_global_mem != dst->device_global_mem) return false;
if (src->sm_major != dst->sm_major) return false;
if (src->sm_minor != dst->sm_minor) return false;
if (src->kernel_exec_timeout != dst->kernel_exec_timeout) return false;
return true;
}
void opencl_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
{
opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
user_options_t *user_options = hashcat_ctx->user_options;
if (opencl_ctx->enabled == false) return;
for (u32 device_id_src = 0; device_id_src < opencl_ctx->devices_cnt; device_id_src++)
{
hc_device_param_t *device_param_src = &opencl_ctx->devices_param[device_id_src];
if (device_param_src->skipped == true) continue;
for (u32 device_id_dst = device_id_src; device_id_dst < opencl_ctx->devices_cnt; device_id_dst++)
{
hc_device_param_t *device_param_dst = &opencl_ctx->devices_param[device_id_dst];
if (device_param_dst->skipped == true) continue;
if (is_same_device_type (device_param_src, device_param_dst) == false) continue;
device_param_dst->kernel_accel = device_param_src->kernel_accel;
device_param_dst->kernel_loops = device_param_src->kernel_loops;
const u32 kernel_power = device_param_dst->device_processors * device_param_dst->kernel_threads_by_user * device_param_dst->kernel_accel;
device_param_dst->kernel_power = kernel_power;
}
}
}
void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
{
opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx;
@ -3568,6 +3624,8 @@ void opencl_ctx_devices_update_power (hashcat_ctx_t *hashcat_ctx)
{
hc_device_param_t *device_param = &opencl_ctx->devices_param[device_id];
if (device_param->skipped == true) continue;
kernel_power_all += device_param->kernel_power;
}

Loading…
Cancel
Save