You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hashcat/src/opencl.c

1303 lines
36 KiB

/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#include "common.h"
#include "types_int.h"
#include "types.h"
#include "interface.h"
#include "convert.h"
#include "memory.h"
#include "logging.h"
#include "locking.h"
#include "ext_OpenCL.h"
#include "ext_ADL.h"
#include "ext_nvapi.h"
#include "ext_nvml.h"
#include "ext_xnvctrl.h"
#include "rp_cpu.h"
#include "timer.h"
#include "opencl.h"
#include "shared.h"
#include "hwmon.h"
#include "mpsp.h"
#include "status.h"
#include "stdout.h"
#include "restore.h"
#include "outfile.h"
#include "potfile.h"
#include "debugfile.h"
#include "loopback.h"
#include "thread.h"
#include "dictstat.h"
#include "wordlist.h"
#include "filehandling.h"
#include "hash_management.h"
#include "data.h"
extern hc_global_data_t data;
extern hc_thread_mutex_t mux_counter;
uint setup_opencl_platforms_filter (char *opencl_platforms)
{
uint opencl_platforms_filter = 0;
if (opencl_platforms)
{
char *platforms = mystrdup (opencl_platforms);
char *next = strtok (platforms, ",");
do
{
int platform = atoi (next);
if (platform < 1 || platform > 32)
{
log_error ("ERROR: Invalid OpenCL platform %u specified", platform);
exit (-1);
}
opencl_platforms_filter |= 1u << (platform - 1);
} while ((next = strtok (NULL, ",")) != NULL);
myfree (platforms);
}
else
{
opencl_platforms_filter = -1u;
}
return opencl_platforms_filter;
}
u32 setup_devices_filter (char *opencl_devices)
{
u32 devices_filter = 0;
if (opencl_devices)
{
char *devices = mystrdup (opencl_devices);
char *next = strtok (devices, ",");
do
{
int device_id = atoi (next);
if (device_id < 1 || device_id > 32)
{
log_error ("ERROR: Invalid device_id %u specified", device_id);
exit (-1);
}
devices_filter |= 1u << (device_id - 1);
} while ((next = strtok (NULL, ",")) != NULL);
myfree (devices);
}
else
{
devices_filter = -1u;
}
return devices_filter;
}
cl_device_type setup_device_types_filter (char *opencl_device_types)
{
cl_device_type device_types_filter = 0;
if (opencl_device_types)
{
char *device_types = mystrdup (opencl_device_types);
char *next = strtok (device_types, ",");
do
{
int device_type = atoi (next);
if (device_type < 1 || device_type > 3)
{
log_error ("ERROR: Invalid device_type %u specified", device_type);
exit (-1);
}
device_types_filter |= 1u << device_type;
} while ((next = strtok (NULL, ",")) != NULL);
myfree (device_types);
}
else
{
// Do not use CPU by default, this often reduces GPU performance because
// the CPU is too busy to handle GPU synchronization
device_types_filter = CL_DEVICE_TYPE_ALL & ~CL_DEVICE_TYPE_CPU;
}
return device_types_filter;
}
void load_kernel (const char *kernel_file, int num_devices, size_t *kernel_lengths, const u8 **kernel_sources)
{
FILE *fp = fopen (kernel_file, "rb");
if (fp != NULL)
{
struct stat st;
memset (&st, 0, sizeof (st));
stat (kernel_file, &st);
u8 *buf = (u8 *) mymalloc (st.st_size + 1);
size_t num_read = fread (buf, sizeof (u8), st.st_size, fp);
if (num_read != (size_t) st.st_size)
{
log_error ("ERROR: %s: %s", kernel_file, strerror (errno));
exit (-1);
}
fclose (fp);
buf[st.st_size] = 0;
for (int i = 0; i < num_devices; i++)
{
kernel_lengths[i] = (size_t) st.st_size;
kernel_sources[i] = buf;
}
}
else
{
log_error ("ERROR: %s: %s", kernel_file, strerror (errno));
exit (-1);
}
return;
}
void writeProgramBin (char *dst, u8 *binary, size_t binary_size)
{
if (binary_size > 0)
{
FILE *fp = fopen (dst, "wb");
lock_file (fp);
fwrite (binary, sizeof (u8), binary_size, fp);
fflush (fp);
fclose (fp);
}
}
int gidd_to_pw_t (hc_device_param_t *device_param, const u64 gidd, pw_t *pw)
{
cl_int CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, gidd * sizeof (pw_t), sizeof (pw_t), pw, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
return 0;
}
int choose_kernel (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt, const uint fast_iteration)
{
cl_int CL_err = CL_SUCCESS;
if (hashconfig->hash_mode == 2000)
{
process_stdout (device_param, pws_cnt);
return 0;
}
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (attack_mode == ATTACK_MODE_BF)
{
if (opts_type & OPTS_TYPE_PT_BITSLICE)
{
const uint size_tm = 32 * sizeof (bs_word_t);
run_kernel_bzero (device_param, device_param->d_tm_c, size_tm);
run_kernel_tm (device_param);
CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
}
if (highest_pw_len < 16)
{
run_kernel (KERN_RUN_1, device_param, pws_cnt, true, fast_iteration, hashconfig);
}
else if (highest_pw_len < 32)
{
run_kernel (KERN_RUN_2, device_param, pws_cnt, true, fast_iteration, hashconfig);
}
else
{
run_kernel (KERN_RUN_3, device_param, pws_cnt, true, fast_iteration, hashconfig);
}
}
else
{
run_kernel_amp (device_param, pws_cnt);
run_kernel (KERN_RUN_1, device_param, pws_cnt, false, 0, hashconfig);
if (opts_type & OPTS_TYPE_HOOK12)
{
run_kernel (KERN_RUN_12, device_param, pws_cnt, false, 0, hashconfig);
CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
// do something with data
CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
uint iter = salt_buf->salt_iter;
uint loop_step = device_param->kernel_loops;
for (uint loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
{
uint loop_left = iter - loop_pos;
loop_left = MIN (loop_left, loop_step);
device_param->kernel_params_buf32[28] = loop_pos;
device_param->kernel_params_buf32[29] = loop_left;
run_kernel (KERN_RUN_2, device_param, pws_cnt, true, slow_iteration, hashconfig);
if (data.devices_status == STATUS_CRACKED) break;
if (data.devices_status == STATUS_ABORTED) break;
if (data.devices_status == STATUS_QUIT) break;
if (data.devices_status == STATUS_BYPASS) break;
/**
* speed
*/
const float iter_part = (float) (loop_pos + loop_left) / iter;
const u64 perf_sum_all = (u64) (pws_cnt * iter_part);
double speed_ms;
hc_timer_get (device_param->timer_speed, speed_ms);
const u32 speed_pos = device_param->speed_pos;
device_param->speed_cnt[speed_pos] = perf_sum_all;
device_param->speed_ms[speed_pos] = speed_ms;
if (data.benchmark == 1)
{
if (speed_ms > 4096) data.devices_status = STATUS_ABORTED;
}
}
if (opts_type & OPTS_TYPE_HOOK23)
{
run_kernel (KERN_RUN_23, device_param, pws_cnt, false, 0, hashconfig);
CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
// do something with data
CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
run_kernel (KERN_RUN_3, device_param, pws_cnt, false, 0, hashconfig);
}
return 0;
}
int run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update, const uint iteration, hashconfig_t *hashconfig)
{
cl_int CL_err = CL_SUCCESS;
uint num_elements = num;
device_param->kernel_params_buf32[33] = data.combs_mode;
device_param->kernel_params_buf32[34] = num;
uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = NULL;
switch (kern_run)
{
case KERN_RUN_1: kernel = device_param->kernel1; break;
case KERN_RUN_12: kernel = device_param->kernel12; break;
case KERN_RUN_2: kernel = device_param->kernel2; break;
case KERN_RUN_23: kernel = device_param->kernel23; break;
case KERN_RUN_3: kernel = device_param->kernel3; break;
}
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 24, sizeof (cl_uint), device_param->kernel_params[24]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 25, sizeof (cl_uint), device_param->kernel_params[25]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 26, sizeof (cl_uint), device_param->kernel_params[26]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 27, sizeof (cl_uint), device_param->kernel_params[27]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 28, sizeof (cl_uint), device_param->kernel_params[28]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 29, sizeof (cl_uint), device_param->kernel_params[29]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 32, sizeof (cl_uint), device_param->kernel_params[32]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 33, sizeof (cl_uint), device_param->kernel_params[33]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 34, sizeof (cl_uint), device_param->kernel_params[34]);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err));
return -1;
}
cl_event event;
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (data.attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads / 32, 32, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else
{
if (kern_run == KERN_RUN_2)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD)
{
num_elements = CEIL (num_elements / device_param->vector_width);
}
}
while (num_elements % kernel_threads) num_elements++;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
CL_err = hc_clFlush (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err));
return -1;
}
if (device_param->nvidia_spin_damp > 0)
{
if (data.devices_status == STATUS_RUNNING)
{
if (iteration < EXPECTED_ITERATIONS)
{
switch (kern_run)
{
case KERN_RUN_1: if (device_param->exec_us_prev1[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev1[iteration] * device_param->nvidia_spin_damp)); break;
case KERN_RUN_2: if (device_param->exec_us_prev2[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev2[iteration] * device_param->nvidia_spin_damp)); break;
case KERN_RUN_3: if (device_param->exec_us_prev3[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev3[iteration] * device_param->nvidia_spin_damp)); break;
}
}
}
}
CL_err = hc_clWaitForEvents (data.ocl, 1, &event);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clWaitForEvents(): %s\n", val2cstr_cl (CL_err));
return -1;
}
cl_ulong time_start;
cl_ulong time_end;
CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL);
CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clGetEventProfilingInfo(): %s\n", val2cstr_cl (CL_err));
return -1;
}
const double exec_us = (double) (time_end - time_start) / 1000;
if (data.devices_status == STATUS_RUNNING)
{
if (iteration < EXPECTED_ITERATIONS)
{
switch (kern_run)
{
case KERN_RUN_1: device_param->exec_us_prev1[iteration] = exec_us; break;
case KERN_RUN_2: device_param->exec_us_prev2[iteration] = exec_us; break;
case KERN_RUN_3: device_param->exec_us_prev3[iteration] = exec_us; break;
}
}
}
if (event_update)
{
uint exec_pos = device_param->exec_pos;
device_param->exec_ms[exec_pos] = exec_us / 1000;
exec_pos++;
if (exec_pos == EXEC_CACHE)
{
exec_pos = 0;
}
device_param->exec_pos = exec_pos;
}
CL_err = hc_clReleaseEvent (data.ocl, event);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clReleaseEvent(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFinish (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err));
return -1;
}
return 0;
}
int run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num)
{
cl_int CL_err = CL_SUCCESS;
uint num_elements = num;
switch (kern_run)
{
case KERN_RUN_MP: device_param->kernel_params_mp_buf32[8] = num; break;
case KERN_RUN_MP_R: device_param->kernel_params_mp_r_buf32[8] = num; break;
case KERN_RUN_MP_L: device_param->kernel_params_mp_l_buf32[9] = num; break;
}
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = NULL;
switch (kern_run)
{
case KERN_RUN_MP: kernel = device_param->kernel_mp; break;
case KERN_RUN_MP_R: kernel = device_param->kernel_mp_r; break;
case KERN_RUN_MP_L: kernel = device_param->kernel_mp_l; break;
}
switch (kern_run)
{
case KERN_RUN_MP: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp[4]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp[5]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp[6]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp[7]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp[8]);
break;
case KERN_RUN_MP_R: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_r[4]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_r[5]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_r[6]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_r[7]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_r[8]);
break;
case KERN_RUN_MP_L: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_l[4]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_l[5]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_l[6]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_l[7]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_l[8]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 9, sizeof (cl_uint), device_param->kernel_params_mp_l[9]);
break;
}
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err));
return -1;
}
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFlush (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFinish (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err));
return -1;
}
return 0;
}
int run_kernel_tm (hc_device_param_t *device_param)
{
cl_int CL_err = CL_SUCCESS;
const uint num_elements = 1024; // fixed
uint kernel_threads = 32;
cl_kernel kernel = device_param->kernel_tm;
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFlush (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFinish (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err));
return -1;
}
return 0;
}
int run_kernel_amp (hc_device_param_t *device_param, const uint num)
{
cl_int CL_err = CL_SUCCESS;
uint num_elements = num;
device_param->kernel_params_amp_buf32[5] = data.combs_mode;
device_param->kernel_params_amp_buf32[6] = num_elements;
// causes problems with special threads like in bcrypt
// const uint kernel_threads = device_param->kernel_threads;
uint kernel_threads = device_param->kernel_threads;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = device_param->kernel_amp;
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err));
return -1;
}
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFlush (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFinish (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err));
return -1;
}
return 0;
}
int run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num)
{
cl_int CL_err = CL_SUCCESS;
const u32 num16d = num / 16;
const u32 num16m = num % 16;
if (num16d)
{
device_param->kernel_params_memset_buf32[1] = value;
device_param->kernel_params_memset_buf32[2] = num16d;
uint kernel_threads = device_param->kernel_threads;
uint num_elements = num16d;
while (num_elements % kernel_threads) num_elements++;
cl_kernel kernel = device_param->kernel_memset;
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem), (void *) &buf);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]);
CL_err |= hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err));
return -1;
}
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFlush (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err));
return -1;
}
CL_err = hc_clFinish (data.ocl, device_param->command_queue);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
if (num16m)
{
u32 tmp[4];
tmp[0] = value;
tmp[1] = value;
tmp[2] = value;
tmp[3] = value;
CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
return 0;
}
int run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size)
{
return run_kernel_memset (device_param, buf, 0, size);
}
int run_copy (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt)
{
cl_int CL_err = CL_SUCCESS;
if (data.attack_kern == ATTACK_KERN_STRAIGHT)
{
CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_kern == ATTACK_KERN_COMBI)
{
if (data.attack_mode == ATTACK_MODE_COMBI)
{
if (data.combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x01;
}
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x80;
}
}
}
}
else if (data.attack_mode == ATTACK_MODE_HYBRID2)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x01;
}
}
else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
for (u32 i = 0; i < pws_cnt; i++)
{
const u32 pw_len = device_param->pws_buf[i].pw_len;
u8 *ptr = (u8 *) device_param->pws_buf[i].i;
ptr[pw_len] = 0x80;
}
}
}
CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_kern == ATTACK_KERN_BF)
{
const u64 off = device_param->words_off;
device_param->kernel_params_mp_l_buf64[3] = off;
run_kernel_mp (KERN_RUN_MP_L, device_param, pws_cnt);
}
return 0;
}
int run_cracker (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt)
{
char *line_buf = (char *) mymalloc (HCBUFSIZ_LARGE);
// init speed timer
uint speed_pos = device_param->speed_pos;
#if defined (_POSIX)
if (device_param->timer_speed.tv_sec == 0)
{
hc_timer_set (&device_param->timer_speed);
}
#endif
#if defined (_WIN)
if (device_param->timer_speed.QuadPart == 0)
{
hc_timer_set (&device_param->timer_speed);
}
#endif
// find higest password length, this is for optimization stuff
uint highest_pw_len = 0;
if (data.attack_kern == ATTACK_KERN_STRAIGHT)
{
}
else if (data.attack_kern == ATTACK_KERN_COMBI)
{
}
else if (data.attack_kern == ATTACK_KERN_BF)
{
highest_pw_len = device_param->kernel_params_mp_l_buf32[4]
+ device_param->kernel_params_mp_l_buf32[5];
}
// iteration type
uint innerloop_step = 0;
uint innerloop_cnt = 0;
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) innerloop_step = device_param->kernel_loops;
else innerloop_step = 1;
if (data.attack_kern == ATTACK_KERN_STRAIGHT) innerloop_cnt = data.kernel_rules_cnt;
else if (data.attack_kern == ATTACK_KERN_COMBI) innerloop_cnt = data.combs_cnt;
else if (data.attack_kern == ATTACK_KERN_BF) innerloop_cnt = data.bfs_cnt;
// loop start: most outer loop = salt iteration, then innerloops (if multi)
for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++)
{
while (data.devices_status == STATUS_PAUSED) hc_sleep (1);
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
if (data.devices_status == STATUS_CRACKED) break;
if (data.devices_status == STATUS_ABORTED) break;
if (data.devices_status == STATUS_QUIT) break;
if (data.devices_status == STATUS_BYPASS) break;
salt_t *salt_buf = &data.salts_buf[salt_pos];
device_param->kernel_params_buf32[27] = salt_pos;
device_param->kernel_params_buf32[31] = salt_buf->digests_cnt;
device_param->kernel_params_buf32[32] = salt_buf->digests_offset;
FILE *combs_fp = device_param->combs_fp;
if (data.attack_mode == ATTACK_MODE_COMBI)
{
rewind (combs_fp);
}
// innerloops
for (uint innerloop_pos = 0; innerloop_pos < innerloop_cnt; innerloop_pos += innerloop_step)
{
while (data.devices_status == STATUS_PAUSED) hc_sleep (1);
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
if (data.devices_status == STATUS_CRACKED) break;
if (data.devices_status == STATUS_ABORTED) break;
if (data.devices_status == STATUS_QUIT) break;
if (data.devices_status == STATUS_BYPASS) break;
uint fast_iteration = 0;
uint innerloop_left = innerloop_cnt - innerloop_pos;
if (innerloop_left > innerloop_step)
{
innerloop_left = innerloop_step;
fast_iteration = 1;
}
device_param->innerloop_pos = innerloop_pos;
device_param->innerloop_left = innerloop_left;
device_param->kernel_params_buf32[30] = innerloop_left;
// i think we can get rid of this
if (innerloop_left == 0)
{
puts ("bug, how should this happen????\n");
continue;
}
if (data.salts_shown[salt_pos] == 1)
{
data.words_progress_done[salt_pos] += (u64) pws_cnt * (u64) innerloop_left;
continue;
}
// initialize amplifiers
if (data.attack_mode == ATTACK_MODE_COMBI)
{
uint i = 0;
while (i < innerloop_left)
{
if (feof (combs_fp)) break;
int line_len = fgetl (combs_fp, line_buf);
if (line_len >= PW_MAX1) continue;
line_len = convert_from_hex (line_buf, line_len);
char *line_buf_new = line_buf;
if (run_rule_engine (data.rule_len_r, data.rule_buf_r))
{
char rule_buf_out[BLOCK_SIZE] = { 0 };
int rule_len_out = _old_apply_rule (data.rule_buf_r, data.rule_len_r, line_buf, line_len, rule_buf_out);
if (rule_len_out < 0)
{
data.words_progress_rejected[salt_pos] += pws_cnt;
continue;
}
line_len = rule_len_out;
line_buf_new = rule_buf_out;
}
line_len = MIN (line_len, PW_DICTMAX);
u8 *ptr = (u8 *) device_param->combs_buf[i].i;
memcpy (ptr, line_buf_new, line_len);
memset (ptr + line_len, 0, PW_DICTMAX1 - line_len);
if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
{
uppercase (ptr, line_len);
}
if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
{
ptr[line_len] = 0x80;
}
if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
{
ptr[line_len] = 0x01;
}
}
device_param->combs_buf[i].pw_len = line_len;
i++;
}
for (uint j = i; j < innerloop_left; j++)
{
device_param->combs_buf[j].i[0] = 0;
device_param->combs_buf[j].i[1] = 0;
device_param->combs_buf[j].i[2] = 0;
device_param->combs_buf[j].i[3] = 0;
device_param->combs_buf[j].i[4] = 0;
device_param->combs_buf[j].i[5] = 0;
device_param->combs_buf[j].i[6] = 0;
device_param->combs_buf[j].i[7] = 0;
device_param->combs_buf[j].pw_len = 0;
}
innerloop_left = i;
}
else if (data.attack_mode == ATTACK_MODE_BF)
{
u64 off = innerloop_pos;
device_param->kernel_params_mp_r_buf64[3] = off;
run_kernel_mp (KERN_RUN_MP_R, device_param, innerloop_left);
}
else if (data.attack_mode == ATTACK_MODE_HYBRID1)
{
u64 off = innerloop_pos;
device_param->kernel_params_mp_buf64[3] = off;
run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left);
}
else if (data.attack_mode == ATTACK_MODE_HYBRID2)
{
u64 off = innerloop_pos;
device_param->kernel_params_mp_buf64[3] = off;
run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left);
}
// copy amplifiers
if (data.attack_mode == ATTACK_MODE_STRAIGHT)
{
cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_mode == ATTACK_MODE_COMBI)
{
cl_int CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (comb_t), device_param->combs_buf, 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_mode == ATTACK_MODE_BF)
{
cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_mode == ATTACK_MODE_HYBRID1)
{
cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
else if (data.attack_mode == ATTACK_MODE_HYBRID2)
{
cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL);
if (CL_err != CL_SUCCESS)
{
log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err));
return -1;
}
}
if (data.benchmark == 1)
{
hc_timer_set (&device_param->timer_speed);
}
int rc = choose_kernel (device_param, hashconfig, hashconfig->attack_exec, data.attack_mode, hashconfig->opts_type, salt_buf, highest_pw_len, pws_cnt, fast_iteration);
if (rc == -1) return -1;
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
if (data.devices_status == STATUS_CRACKED) break;
if (data.devices_status == STATUS_ABORTED) break;
if (data.devices_status == STATUS_QUIT) break;
if (data.devices_status == STATUS_BYPASS) break;
/**
* result
*/
if (data.benchmark == 0)
{
check_cracked (device_param, salt_pos, hashconfig);
}
/**
* progress
*/
u64 perf_sum_all = (u64) pws_cnt * (u64) innerloop_left;
hc_thread_mutex_lock (mux_counter);
data.words_progress_done[salt_pos] += perf_sum_all;
hc_thread_mutex_unlock (mux_counter);
/**
* speed
*/
double speed_ms;
hc_timer_get (device_param->timer_speed, speed_ms);
hc_timer_set (&device_param->timer_speed);
// current speed
//hc_thread_mutex_lock (mux_display);
device_param->speed_cnt[speed_pos] = perf_sum_all;
device_param->speed_ms[speed_pos] = speed_ms;
//hc_thread_mutex_unlock (mux_display);
speed_pos++;
if (speed_pos == SPEED_CACHE)
{
speed_pos = 0;
}
/**
* benchmark
*/
if (data.benchmark == 1) break;
}
}
device_param->speed_pos = speed_pos;
myfree (line_buf);
return 0;
}