diff --git a/include/hash_management.h b/include/hash_management.h index 42d16fa08..30d958662 100644 --- a/include/hash_management.h +++ b/include/hash_management.h @@ -6,4 +6,12 @@ #ifndef _HASH_MANAGEMENT_H #define _HASH_MANAGEMENT_H +#define USERNAME 0 + +void save_hash (); + +void check_hash (hc_device_param_t *device_param, plain_t *plain); + +int check_cracked (hc_device_param_t *device_param, const uint salt_pos, hashconfig_t *hashconfig); + #endif // _HASH_MANAGEMENT_H diff --git a/include/interface.h b/include/interface.h index a6fe6ae7e..fe70274a7 100644 --- a/include/interface.h +++ b/include/interface.h @@ -1658,6 +1658,7 @@ int opencart_parse_hash (char *input_buf, uint input_len, hash_t *hash * output functions */ +char *stroptitype (const uint opti_type); char *strhashtype (const uint hash_mode); char *strparser (const uint parser_status); diff --git a/include/kernel.h b/include/kernel.h deleted file mode 100644 index f0695cd0d..000000000 --- a/include/kernel.h +++ /dev/null @@ -1,9 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#ifndef _KERNEL_H -#define _KERNEL_H - -#endif // _KERNEL_H diff --git a/include/opencl.h b/include/opencl.h index 988c1951f..d64408a9f 100644 --- a/include/opencl.h +++ b/include/opencl.h @@ -11,6 +11,18 @@ #define PARAMCNT 64 + +#define KERNEL_ACCEL 0 +#define KERNEL_LOOPS 0 +#define KERNEL_RULES 1024 +#define KERNEL_COMBS 1024 +#define KERNEL_BFS 1024 +#define KERNEL_THREADS_MAX 256 +#define KERNEL_THREADS_MAX_CPU 1 +#define WORKLOAD_PROFILE 2 +#define SCRYPT_TMTO 0 +#define NVIDIA_SPIN_DAMP 100 + static const char CL_VENDOR_AMD[] = "Advanced Micro Devices, Inc."; static const char CL_VENDOR_AMD_USE_INTEL[] = "GenuineIntel"; static const char CL_VENDOR_APPLE[] = "Apple"; @@ -222,6 +234,16 @@ cl_device_type setup_device_types_filter (char *opencl_device_types); void load_kernel (const char *kernel_file, int num_devices, size_t *kernel_lengths, const u8 **kernel_sources); void writeProgramBin (char *dst, u8 *binary, size_t binary_size); -double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_entries); - int gidd_to_pw_t (hc_device_param_t *device_param, const u64 gidd, pw_t *pw); + +int choose_kernel (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt, const uint fast_iteration); +int run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update, const uint iteration, hashconfig_t *hashconfig); +int run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num); +int run_kernel_tm (hc_device_param_t *device_param); +int run_kernel_amp (hc_device_param_t *device_param, const uint num); +int run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num); +int run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size); + +int run_copy (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt); + +int run_cracker (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt); diff --git a/include/restore.h b/include/restore.h index def040b4f..3bd6e939a 100644 --- a/include/restore.h +++ b/include/restore.h @@ -57,4 +57,8 @@ void write_restore (const char *new_restore_file, restore_data_t *rd); void cycle_restore (); +void check_checkpoint (); + +void stop_at_checkpoint (); + #endif // _RESTORE_H diff --git a/include/status.h b/include/status.h index 94a4fd9dc..106081944 100644 --- a/include/status.h +++ b/include/status.h @@ -29,6 +29,8 @@ typedef enum status_rc } status_rc_t; +double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_entries); + void status_display_machine_readable (); void status_display (); void status_benchmark_automate (); diff --git a/include/weak_hash.h b/include/weak_hash.h index 6307febe7..c00ef1559 100644 --- a/include/weak_hash.h +++ b/include/weak_hash.h @@ -6,4 +6,8 @@ #ifndef _WEAK_HASH_H #define _WEAK_HASH_H +#define WEAK_HASH_THRESHOLD 100 + +void weak_hash_check (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint salt_pos); + #endif // _WEAK_HASH_H diff --git a/src/Makefile b/src/Makefile index ca5e054da..b7d18989f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -163,7 +163,7 @@ LFLAGS_CROSS_WIN := -lpsapi ## Objects ## -OBJS_ALL := affinity attack_mode autotune benchmark bitmap bitops common convert cpu_aes cpu_crc32 cpu_des cpu_md5 cpu_sha1 cpu_sha256 data debugfile dictstat dynloader ext_ADL ext_nvapi ext_nvml ext_OpenCL ext_xnvctrl filehandling filenames folder hash_management hlfmt hwmon induct interface kernel locking logfile logging loopback memory mpsp opencl outfile_check outfile potfile powertune remove restore rp_cpu rp_kernel_on_cpu runtime shared status stdout terminal thread timer tuningdb usage version weak_hash wordlist +OBJS_ALL := affinity attack_mode autotune benchmark bitmap bitops common convert cpu_aes cpu_crc32 cpu_des cpu_md5 cpu_sha1 cpu_sha256 data debugfile dictstat dynloader ext_ADL ext_nvapi ext_nvml ext_OpenCL ext_xnvctrl filehandling filenames folder hash_management hlfmt hwmon induct interface locking logfile logging loopback memory mpsp opencl outfile_check outfile potfile powertune remove restore rp_cpu rp_kernel_on_cpu runtime shared status stdout terminal thread timer tuningdb usage version weak_hash wordlist NATIVE_OBJS := $(foreach OBJ,$(OBJS_ALL),obj/$(OBJ).NATIVE.o) diff --git a/src/data.c b/src/data.c index 135474447..8c6118651 100644 --- a/src/data.c +++ b/src/data.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "ext_OpenCL.h" #include "ext_ADL.h" @@ -13,7 +14,6 @@ #include "ext_nvml.h" #include "ext_xnvctrl.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "rp_cpu.h" #include "restore.h" diff --git a/src/hash_management.c b/src/hash_management.c index dffd51046..6133518b3 100644 --- a/src/hash_management.c +++ b/src/hash_management.c @@ -4,4 +4,547 @@ */ #include "common.h" +#include "types_int.h" +#include "types.h" +#include "interface.h" +#include "timer.h" +#include "memory.h" +#include "logging.h" +#include "ext_OpenCL.h" +#include "ext_ADL.h" +#include "ext_nvapi.h" +#include "ext_nvml.h" +#include "ext_xnvctrl.h" +#include "hwmon.h" +#include "mpsp.h" +#include "rp_cpu.h" +#include "restore.h" +#include "opencl.h" +#include "outfile.h" +#include "potfile.h" +#include "debugfile.h" +#include "loopback.h" +#include "data.h" +#include "terminal.h" +#include "status.h" +#include "thread.h" +#include "rp_kernel_on_cpu.h" #include "hash_management.h" + +extern hc_global_data_t data; + +extern hc_thread_mutex_t mux_display; + +void save_hash () +{ + hashconfig_t *hashconfig = data.hashconfig; + void *digests_buf = data.digests_buf; + salt_t *salts_buf = data.salts_buf; + void *esalts_buf = data.esalts_buf; + hashinfo_t **hash_info = data.hash_info; + char *hashfile = data.hashfile; + + char new_hashfile[256] = { 0 }; + char old_hashfile[256] = { 0 }; + + snprintf (new_hashfile, 255, "%s.new", hashfile); + snprintf (old_hashfile, 255, "%s.old", hashfile); + + unlink (new_hashfile); + + char separator = hashconfig->separator; + + FILE *fp = fopen (new_hashfile, "wb"); + + if (fp == NULL) + { + log_error ("ERROR: %s: %s", new_hashfile, strerror (errno)); + + exit (-1); + } + + for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++) + { + if (data.salts_shown[salt_pos] == 1) continue; + + salt_t *salt_buf = &data.salts_buf[salt_pos]; + + for (uint digest_pos = 0; digest_pos < salt_buf->digests_cnt; digest_pos++) + { + uint idx = salt_buf->digests_offset + digest_pos; + + if (data.digests_shown[idx] == 1) continue; + + if (hashconfig->hash_mode != 2500) + { + if (data.username == 1) + { + user_t *user = data.hash_info[idx]->user; + + uint i; + + for (i = 0; i < user->user_len; i++) fputc (user->user_name[i], fp); + + fputc (separator, fp); + } + + char out_buf[HCBUFSIZ_LARGE]; // scratch buffer + + out_buf[0] = 0; + + ascii_digest (out_buf, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf, hash_info, hashfile); + + fputs (out_buf, fp); + + fputc ('\n', fp); + } + else + { + hccap_t hccap; + + to_hccap_t (&hccap, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf); + + fwrite (&hccap, sizeof (hccap_t), 1, fp); + } + } + } + + fflush (fp); + + fclose (fp); + + unlink (old_hashfile); + + if (rename (hashfile, old_hashfile) != 0) + { + log_error ("ERROR: Rename file '%s' to '%s': %s", hashfile, old_hashfile, strerror (errno)); + + exit (-1); + } + + unlink (hashfile); + + if (rename (new_hashfile, hashfile) != 0) + { + log_error ("ERROR: Rename file '%s' to '%s': %s", new_hashfile, hashfile, strerror (errno)); + + exit (-1); + } + + unlink (old_hashfile); +} + +void check_hash (hc_device_param_t *device_param, plain_t *plain) +{ + debugfile_ctx_t *debugfile_ctx = data.debugfile_ctx; + loopback_ctx_t *loopback_ctx = data.loopback_ctx; + outfile_ctx_t *outfile_ctx = data.outfile_ctx; + potfile_ctx_t *potfile_ctx = data.potfile_ctx; + + uint quiet = data.quiet; + + // debugfile + + u8 debug_rule_buf[BLOCK_SIZE] = { 0 }; + u32 debug_rule_len = 0; // -1 error + + u8 debug_plain_ptr[BLOCK_SIZE] = { 0 }; + u32 debug_plain_len = 0; + + // hash + + char out_buf[HCBUFSIZ_LARGE] = { 0 }; + + const u32 salt_pos = plain->salt_pos; + const u32 digest_pos = plain->digest_pos; // relative + const u32 gidvid = plain->gidvid; + const u32 il_pos = plain->il_pos; + + hashconfig_t *hashconfig = data.hashconfig; + void *digests_buf = data.digests_buf; + salt_t *salts_buf = data.salts_buf; + void *esalts_buf = data.esalts_buf; + hashinfo_t **hash_info = data.hash_info; + char *hashfile = data.hashfile; + + ascii_digest (out_buf, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf, hash_info, hashfile); + + // plain + + u64 crackpos = device_param->words_off; + + uint plain_buf[16] = { 0 }; + + u8 *plain_ptr = (u8 *) plain_buf; + + unsigned int plain_len = 0; + + if (data.attack_mode == ATTACK_MODE_STRAIGHT) + { + pw_t pw; + + gidd_to_pw_t (device_param, gidvid, &pw); + + for (int i = 0; i < 16; i++) + { + plain_buf[i] = pw.i[i]; + } + + plain_len = pw.pw_len; + + const uint off = device_param->innerloop_pos + il_pos; + + const uint debug_mode = debugfile_ctx->mode; + + if (debug_mode > 0) + { + debug_rule_len = 0; + + // save rule + if ((debug_mode == 1) || (debug_mode == 3) || (debug_mode == 4)) + { + memset (debug_rule_buf, 0, sizeof (debug_rule_buf)); + + debug_rule_len = kernel_rule_to_cpu_rule ((char *) debug_rule_buf, &data.kernel_rules_buf[off]); + } + + // save plain + if ((debug_mode == 2) || (debug_mode == 3) || (debug_mode == 4)) + { + memset (debug_plain_ptr, 0, sizeof (debug_plain_ptr)); + + memcpy (debug_plain_ptr, plain_ptr, plain_len); + + debug_plain_len = plain_len; + } + } + + plain_len = apply_rules (data.kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], plain_len); + + crackpos += gidvid; + crackpos *= data.kernel_rules_cnt; + crackpos += device_param->innerloop_pos + il_pos; + + if (plain_len > data.pw_max) plain_len = data.pw_max; + } + else if (data.attack_mode == ATTACK_MODE_COMBI) + { + pw_t pw; + + gidd_to_pw_t (device_param, gidvid, &pw); + + for (int i = 0; i < 16; i++) + { + plain_buf[i] = pw.i[i]; + } + + plain_len = pw.pw_len; + + char *comb_buf = (char *) device_param->combs_buf[il_pos].i; + uint comb_len = device_param->combs_buf[il_pos].pw_len; + + if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + memcpy (plain_ptr + plain_len, comb_buf, comb_len); + } + else + { + memmove (plain_ptr + comb_len, plain_ptr, plain_len); + + memcpy (plain_ptr, comb_buf, comb_len); + } + + plain_len += comb_len; + + crackpos += gidvid; + crackpos *= data.combs_cnt; + crackpos += device_param->innerloop_pos + il_pos; + + if (data.pw_max != PW_DICTMAX1) + { + if (plain_len > data.pw_max) plain_len = data.pw_max; + } + } + else if (data.attack_mode == ATTACK_MODE_BF) + { + u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid; + u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos; + + uint l_start = device_param->kernel_params_mp_l_buf32[5]; + uint r_start = device_param->kernel_params_mp_r_buf32[5]; + + uint l_stop = device_param->kernel_params_mp_l_buf32[4]; + uint r_stop = device_param->kernel_params_mp_r_buf32[4]; + + sp_exec (l_off, (char *) plain_ptr + l_start, data.root_css_buf, data.markov_css_buf, l_start, l_start + l_stop); + sp_exec (r_off, (char *) plain_ptr + r_start, data.root_css_buf, data.markov_css_buf, r_start, r_start + r_stop); + + plain_len = data.css_cnt; + + crackpos += gidvid; + crackpos *= data.bfs_cnt; + crackpos += device_param->innerloop_pos + il_pos; + } + else if (data.attack_mode == ATTACK_MODE_HYBRID1) + { + pw_t pw; + + gidd_to_pw_t (device_param, gidvid, &pw); + + for (int i = 0; i < 16; i++) + { + plain_buf[i] = pw.i[i]; + } + + plain_len = pw.pw_len; + + u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; + + uint start = 0; + uint stop = device_param->kernel_params_mp_buf32[4]; + + sp_exec (off, (char *) plain_ptr + plain_len, data.root_css_buf, data.markov_css_buf, start, start + stop); + + plain_len += start + stop; + + crackpos += gidvid; + crackpos *= data.combs_cnt; + crackpos += device_param->innerloop_pos + il_pos; + + if (data.pw_max != PW_DICTMAX1) + { + if (plain_len > data.pw_max) plain_len = data.pw_max; + } + } + else if (data.attack_mode == ATTACK_MODE_HYBRID2) + { + pw_t pw; + + gidd_to_pw_t (device_param, gidvid, &pw); + + for (int i = 0; i < 16; i++) + { + plain_buf[i] = pw.i[i]; + } + + plain_len = pw.pw_len; + + u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; + + uint start = 0; + uint stop = device_param->kernel_params_mp_buf32[4]; + + memmove (plain_ptr + stop, plain_ptr, plain_len); + + sp_exec (off, (char *) plain_ptr, data.root_css_buf, data.markov_css_buf, start, start + stop); + + plain_len += start + stop; + + crackpos += gidvid; + crackpos *= data.combs_cnt; + crackpos += device_param->innerloop_pos + il_pos; + + if (data.pw_max != PW_DICTMAX1) + { + if (plain_len > data.pw_max) plain_len = data.pw_max; + } + } + + if (data.attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opti_type & OPTI_TYPE_BRUTE_FORCE) // lots of optimizations can happen here + { + if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) + { + if (hashconfig->opti_type & OPTI_TYPE_APPENDED_SALT) + { + plain_len = plain_len - data.salts_buf[0].salt_len; + } + } + + if (hashconfig->opts_type & OPTS_TYPE_PT_UNICODE) + { + for (uint i = 0, j = 0; i < plain_len; i += 2, j += 1) + { + plain_ptr[j] = plain_ptr[i]; + } + + plain_len = plain_len / 2; + } + } + } + + // if enabled, update also the potfile + // no need for locking, we're in a mutex protected function + + + if (potfile_ctx->fp != NULL) + { + potfile_write_append (potfile_ctx, out_buf, plain_ptr, plain_len); + } + + // outfile, can be either to file or stdout + // if an error occurs opening the file, send to stdout as fallback + // the fp gets opened for each cracked hash so that the user can modify (move) the outfile while hashcat runs + + + outfile_write_open (outfile_ctx); + + if (outfile_ctx->filename == NULL) if (quiet == 0) clear_prompt (); + + outfile_write (outfile_ctx, out_buf, plain_ptr, plain_len, crackpos, NULL, 0, hashconfig); + + outfile_write_close (outfile_ctx); + + if ((data.wordlist_mode == WL_MODE_FILE) || (data.wordlist_mode == WL_MODE_MASK)) + { + if ((data.devices_status != STATUS_CRACKED) && (data.status != 1)) + { + if (outfile_ctx->filename == NULL) if (quiet == 0) send_prompt (); + } + } + + // if enabled, update also the loopback file + + + if (loopback_ctx->fp != NULL) + { + loopback_write_append (loopback_ctx, plain_ptr, plain_len); + } + + // if enabled, update also the (rule) debug file + + if (debugfile_ctx->fp != NULL) + { + // the next check implies that: + // - (data.attack_mode == ATTACK_MODE_STRAIGHT) + // - debug_mode > 0 + + if ((debug_plain_len > 0) || (debug_rule_len > 0)) + { + debugfile_write_append (debugfile_ctx, debug_rule_buf, debug_rule_len, debug_plain_ptr, debug_plain_len, plain_ptr, plain_len); + } + } +} + +int check_cracked (hc_device_param_t *device_param, const uint salt_pos, hashconfig_t *hashconfig) +{ + salt_t *salt_buf = &data.salts_buf[salt_pos]; + + u32 num_cracked; + + cl_int CL_err; + + CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + if (num_cracked) + { + // display hack (for weak hashes etc, it could be that there is still something to clear on the current line) + + log_info_nn (""); + + plain_t *cracked = (plain_t *) mycalloc (num_cracked, sizeof (plain_t)); + + CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + uint cpt_cracked = 0; + + hc_thread_mutex_lock (mux_display); + + for (uint i = 0; i < num_cracked; i++) + { + const uint hash_pos = cracked[i].hash_pos; + + if (data.digests_shown[hash_pos] == 1) continue; + + if ((hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) == 0) + { + data.digests_shown[hash_pos] = 1; + + data.digests_done++; + + cpt_cracked++; + + salt_buf->digests_done++; + + if (salt_buf->digests_done == salt_buf->digests_cnt) + { + data.salts_shown[salt_pos] = 1; + + data.salts_done++; + } + } + + if (data.salts_done == data.salts_cnt) data.devices_status = STATUS_CRACKED; + + check_hash (device_param, &cracked[i]); + } + + hc_thread_mutex_unlock (mux_display); + + myfree (cracked); + + if (cpt_cracked > 0) + { + hc_thread_mutex_lock (mux_display); + + data.cpt_buf[data.cpt_pos].timestamp = time (NULL); + data.cpt_buf[data.cpt_pos].cracked = cpt_cracked; + + data.cpt_pos++; + + data.cpt_total += cpt_cracked; + + if (data.cpt_pos == CPT_BUF) data.cpt_pos = 0; + + hc_thread_mutex_unlock (mux_display); + } + + if (hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) + { + // we need to reset cracked state on the device + // otherwise host thinks again and again the hash was cracked + // and returns invalid password each time + + memset (data.digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (uint)); + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (uint), salt_buf->digests_cnt * sizeof (uint), &data.digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + num_cracked = 0; + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + return 0; +} + + diff --git a/src/hashcat.c b/src/hashcat.c index 0ef5c1bda..b3ea7dd23 100644 --- a/src/hashcat.c +++ b/src/hashcat.c @@ -25,6 +25,7 @@ #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "bitops.h" #include "memory.h" @@ -53,7 +54,6 @@ #include "terminal.h" #include "inc_hash_constants.h" #include "shared.h" -#include "interface.h" #include "hwmon.h" #include "mpsp.h" #include "restore.h" @@ -80,7 +80,6 @@ #include "debugfile.h" #include "runtime.h" #include "attack_mode.h" -#include "kernel.h" #include "powertune.h" #include "autotune.h" #include "induct.h" @@ -91,6 +90,7 @@ extern int SUPPRESS_OUTPUT; extern hc_thread_mutex_t mux_hwmon; extern hc_thread_mutex_t mux_display; +extern hc_thread_mutex_t mux_counter; extern void (*get_next_word_func) (char *, u32, u32 *, u32 *); @@ -109,11 +109,9 @@ const int comptime = COMPTIME; -// weak_hash -#define WEAK_HASH_THRESHOLD 100 // hash_management -#define USERNAME 0 + int sort_by_digest_p0p1 (const void *v1, const void *v2) { const u32 *d1 = (const u32 *) v1; @@ -211,63 +209,7 @@ int sort_by_hash_no_salt (const void *v1, const void *v2) // attack_mode #define ATTACK_MODE 0 -// kernel -#define KERNEL_ACCEL 0 -#define KERNEL_LOOPS 0 -#define KERNEL_RULES 1024 -#define KERNEL_COMBS 1024 -#define KERNEL_BFS 1024 -#define KERNEL_THREADS_MAX 256 -#define KERNEL_THREADS_MAX_CPU 1 -#define WORKLOAD_PROFILE 2 -#define SCRYPT_TMTO 0 -#define NVIDIA_SPIN_DAMP 100 -static const char OPTI_STR_ZERO_BYTE[] = "Zero-Byte"; -static const char OPTI_STR_PRECOMPUTE_INIT[] = "Precompute-Init"; -static const char OPTI_STR_PRECOMPUTE_MERKLE[] = "Precompute-Merkle-Demgard"; -static const char OPTI_STR_PRECOMPUTE_PERMUT[] = "Precompute-Final-Permutation"; -static const char OPTI_STR_MEET_IN_MIDDLE[] = "Meet-In-The-Middle"; -static const char OPTI_STR_EARLY_SKIP[] = "Early-Skip"; -static const char OPTI_STR_NOT_SALTED[] = "Not-Salted"; -static const char OPTI_STR_NOT_ITERATED[] = "Not-Iterated"; -static const char OPTI_STR_PREPENDED_SALT[] = "Prepended-Salt"; -static const char OPTI_STR_APPENDED_SALT[] = "Appended-Salt"; -static const char OPTI_STR_SINGLE_HASH[] = "Single-Hash"; -static const char OPTI_STR_SINGLE_SALT[] = "Single-Salt"; -static const char OPTI_STR_BRUTE_FORCE[] = "Brute-Force"; -static const char OPTI_STR_RAW_HASH[] = "Raw-Hash"; -static const char OPTI_STR_SLOW_HASH_SIMD[] = "Slow-Hash-SIMD"; -static const char OPTI_STR_USES_BITS_8[] = "Uses-8-Bit"; -static const char OPTI_STR_USES_BITS_16[] = "Uses-16-Bit"; -static const char OPTI_STR_USES_BITS_32[] = "Uses-32-Bit"; -static const char OPTI_STR_USES_BITS_64[] = "Uses-64-Bit"; -static char *stroptitype (const uint opti_type) -{ - switch (opti_type) - { - case OPTI_TYPE_ZERO_BYTE: return ((char *) OPTI_STR_ZERO_BYTE); - case OPTI_TYPE_PRECOMPUTE_INIT: return ((char *) OPTI_STR_PRECOMPUTE_INIT); - case OPTI_TYPE_PRECOMPUTE_MERKLE: return ((char *) OPTI_STR_PRECOMPUTE_MERKLE); - case OPTI_TYPE_PRECOMPUTE_PERMUT: return ((char *) OPTI_STR_PRECOMPUTE_PERMUT); - case OPTI_TYPE_MEET_IN_MIDDLE: return ((char *) OPTI_STR_MEET_IN_MIDDLE); - case OPTI_TYPE_EARLY_SKIP: return ((char *) OPTI_STR_EARLY_SKIP); - case OPTI_TYPE_NOT_SALTED: return ((char *) OPTI_STR_NOT_SALTED); - case OPTI_TYPE_NOT_ITERATED: return ((char *) OPTI_STR_NOT_ITERATED); - case OPTI_TYPE_PREPENDED_SALT: return ((char *) OPTI_STR_PREPENDED_SALT); - case OPTI_TYPE_APPENDED_SALT: return ((char *) OPTI_STR_APPENDED_SALT); - case OPTI_TYPE_SINGLE_HASH: return ((char *) OPTI_STR_SINGLE_HASH); - case OPTI_TYPE_SINGLE_SALT: return ((char *) OPTI_STR_SINGLE_SALT); - case OPTI_TYPE_BRUTE_FORCE: return ((char *) OPTI_STR_BRUTE_FORCE); - case OPTI_TYPE_RAW_HASH: return ((char *) OPTI_STR_RAW_HASH); - case OPTI_TYPE_SLOW_HASH_SIMD: return ((char *) OPTI_STR_SLOW_HASH_SIMD); - case OPTI_TYPE_USES_BITS_8: return ((char *) OPTI_STR_USES_BITS_8); - case OPTI_TYPE_USES_BITS_16: return ((char *) OPTI_STR_USES_BITS_16); - case OPTI_TYPE_USES_BITS_32: return ((char *) OPTI_STR_USES_BITS_32); - case OPTI_TYPE_USES_BITS_64: return ((char *) OPTI_STR_USES_BITS_64); - } - return (NULL); -} // powertune #define POWERTUNE_ENABLE 0 @@ -279,21 +221,9 @@ static double TARGET_MS_PROFILE[4] = { 2, 12, 96, 480 }; // thread static hc_thread_mutex_t mux_dispatcher; -static hc_thread_mutex_t mux_counter; - -// restore -static void check_checkpoint () -{ - // if (data.restore_disable == 1) break; (this is already implied by previous checks) - u64 words_cur = get_lowest_words_done (); - if (words_cur != data.checkpoint_cur_words) - { - myabort (); - } -} // data @@ -338,1878 +268,290 @@ static void check_checkpoint () -static void check_hash (hc_device_param_t *device_param, plain_t *plain) -{ - debugfile_ctx_t *debugfile_ctx = data.debugfile_ctx; - loopback_ctx_t *loopback_ctx = data.loopback_ctx; - outfile_ctx_t *outfile_ctx = data.outfile_ctx; - potfile_ctx_t *potfile_ctx = data.potfile_ctx; - - uint quiet = data.quiet; - - // debugfile - - u8 debug_rule_buf[BLOCK_SIZE] = { 0 }; - u32 debug_rule_len = 0; // -1 error - - u8 debug_plain_ptr[BLOCK_SIZE] = { 0 }; - u32 debug_plain_len = 0; - - // hash - - char out_buf[HCBUFSIZ_LARGE] = { 0 }; - - const u32 salt_pos = plain->salt_pos; - const u32 digest_pos = plain->digest_pos; // relative - const u32 gidvid = plain->gidvid; - const u32 il_pos = plain->il_pos; - - hashconfig_t *hashconfig = data.hashconfig; - void *digests_buf = data.digests_buf; - salt_t *salts_buf = data.salts_buf; - void *esalts_buf = data.esalts_buf; - hashinfo_t **hash_info = data.hash_info; - char *hashfile = data.hashfile; - - ascii_digest (out_buf, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf, hash_info, hashfile); - - // plain - - u64 crackpos = device_param->words_off; - - uint plain_buf[16] = { 0 }; - - u8 *plain_ptr = (u8 *) plain_buf; - - unsigned int plain_len = 0; - - if (data.attack_mode == ATTACK_MODE_STRAIGHT) - { - pw_t pw; - - gidd_to_pw_t (device_param, gidvid, &pw); - - for (int i = 0; i < 16; i++) - { - plain_buf[i] = pw.i[i]; - } - - plain_len = pw.pw_len; - - const uint off = device_param->innerloop_pos + il_pos; - - const uint debug_mode = debugfile_ctx->mode; - - if (debug_mode > 0) - { - debug_rule_len = 0; - - // save rule - if ((debug_mode == 1) || (debug_mode == 3) || (debug_mode == 4)) - { - memset (debug_rule_buf, 0, sizeof (debug_rule_buf)); - - debug_rule_len = kernel_rule_to_cpu_rule ((char *) debug_rule_buf, &data.kernel_rules_buf[off]); - } - - // save plain - if ((debug_mode == 2) || (debug_mode == 3) || (debug_mode == 4)) - { - memset (debug_plain_ptr, 0, sizeof (debug_plain_ptr)); - - memcpy (debug_plain_ptr, plain_ptr, plain_len); - - debug_plain_len = plain_len; - } - } - - plain_len = apply_rules (data.kernel_rules_buf[off].cmds, &plain_buf[0], &plain_buf[4], plain_len); - - crackpos += gidvid; - crackpos *= data.kernel_rules_cnt; - crackpos += device_param->innerloop_pos + il_pos; - - if (plain_len > data.pw_max) plain_len = data.pw_max; - } - else if (data.attack_mode == ATTACK_MODE_COMBI) - { - pw_t pw; - - gidd_to_pw_t (device_param, gidvid, &pw); - - for (int i = 0; i < 16; i++) - { - plain_buf[i] = pw.i[i]; - } - - plain_len = pw.pw_len; - - char *comb_buf = (char *) device_param->combs_buf[il_pos].i; - uint comb_len = device_param->combs_buf[il_pos].pw_len; - if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - memcpy (plain_ptr + plain_len, comb_buf, comb_len); - } - else - { - memmove (plain_ptr + comb_len, plain_ptr, plain_len); - memcpy (plain_ptr, comb_buf, comb_len); - } - plain_len += comb_len; +static double try_run (hc_device_param_t *device_param, hashconfig_t *hashconfig, const u32 kernel_accel, const u32 kernel_loops) +{ + const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads * kernel_accel; - crackpos += gidvid; - crackpos *= data.combs_cnt; - crackpos += device_param->innerloop_pos + il_pos; + device_param->kernel_params_buf32[28] = 0; + device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set + device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes - if (data.pw_max != PW_DICTMAX1) - { - if (plain_len > data.pw_max) plain_len = data.pw_max; - } - } - else if (data.attack_mode == ATTACK_MODE_BF) + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { - u64 l_off = device_param->kernel_params_mp_l_buf64[3] + gidvid; - u64 r_off = device_param->kernel_params_mp_r_buf64[3] + il_pos; - - uint l_start = device_param->kernel_params_mp_l_buf32[5]; - uint r_start = device_param->kernel_params_mp_r_buf32[5]; - - uint l_stop = device_param->kernel_params_mp_l_buf32[4]; - uint r_stop = device_param->kernel_params_mp_r_buf32[4]; - - sp_exec (l_off, (char *) plain_ptr + l_start, data.root_css_buf, data.markov_css_buf, l_start, l_start + l_stop); - sp_exec (r_off, (char *) plain_ptr + r_start, data.root_css_buf, data.markov_css_buf, r_start, r_start + r_stop); - - plain_len = data.css_cnt; - - crackpos += gidvid; - crackpos *= data.bfs_cnt; - crackpos += device_param->innerloop_pos + il_pos; + run_kernel (KERN_RUN_1, device_param, kernel_power_try, true, 0, hashconfig); } - else if (data.attack_mode == ATTACK_MODE_HYBRID1) + else { - pw_t pw; - - gidd_to_pw_t (device_param, gidvid, &pw); - - for (int i = 0; i < 16; i++) - { - plain_buf[i] = pw.i[i]; - } - - plain_len = pw.pw_len; - - u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; - - uint start = 0; - uint stop = device_param->kernel_params_mp_buf32[4]; - - sp_exec (off, (char *) plain_ptr + plain_len, data.root_css_buf, data.markov_css_buf, start, start + stop); - - plain_len += start + stop; - - crackpos += gidvid; - crackpos *= data.combs_cnt; - crackpos += device_param->innerloop_pos + il_pos; - - if (data.pw_max != PW_DICTMAX1) - { - if (plain_len > data.pw_max) plain_len = data.pw_max; - } + run_kernel (KERN_RUN_2, device_param, kernel_power_try, true, 0, hashconfig); } - else if (data.attack_mode == ATTACK_MODE_HYBRID2) - { - pw_t pw; - - gidd_to_pw_t (device_param, gidvid, &pw); - - for (int i = 0; i < 16; i++) - { - plain_buf[i] = pw.i[i]; - } - - plain_len = pw.pw_len; - u64 off = device_param->kernel_params_mp_buf64[3] + il_pos; + const double exec_ms_prev = get_avg_exec_time (device_param, 1); - uint start = 0; - uint stop = device_param->kernel_params_mp_buf32[4]; + return exec_ms_prev; +} - memmove (plain_ptr + stop, plain_ptr, plain_len); +static int autotune (hc_device_param_t *device_param, hashconfig_t *hashconfig) +{ + const double target_ms = TARGET_MS_PROFILE[data.workload_profile - 1]; - sp_exec (off, (char *) plain_ptr, data.root_css_buf, data.markov_css_buf, start, start + stop); + const u32 kernel_accel_min = device_param->kernel_accel_min; + const u32 kernel_accel_max = device_param->kernel_accel_max; - plain_len += start + stop; + const u32 kernel_loops_min = device_param->kernel_loops_min; + const u32 kernel_loops_max = device_param->kernel_loops_max; - crackpos += gidvid; - crackpos *= data.combs_cnt; - crackpos += device_param->innerloop_pos + il_pos; + u32 kernel_accel = kernel_accel_min; + u32 kernel_loops = kernel_loops_min; - if (data.pw_max != PW_DICTMAX1) - { - if (plain_len > data.pw_max) plain_len = data.pw_max; - } - } + // in this case the user specified a fixed -u and -n on the commandline + // no way to tune anything + // but we need to run a few caching rounds - if (data.attack_mode == ATTACK_MODE_BF) + if ((kernel_loops_min == kernel_loops_max) && (kernel_accel_min == kernel_accel_max)) { - if (hashconfig->opti_type & OPTI_TYPE_BRUTE_FORCE) // lots of optimizations can happen here + if (hashconfig->hash_mode != 2000) { - if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) - { - if (hashconfig->opti_type & OPTI_TYPE_APPENDED_SALT) - { - plain_len = plain_len - data.salts_buf[0].salt_len; - } - } - - if (hashconfig->opts_type & OPTS_TYPE_PT_UNICODE) - { - for (uint i = 0, j = 0; i < plain_len; i += 2, j += 1) - { - plain_ptr[j] = plain_ptr[i]; - } - - plain_len = plain_len / 2; - } + try_run (device_param, hashconfig, kernel_accel, kernel_loops); + try_run (device_param, hashconfig, kernel_accel, kernel_loops); + try_run (device_param, hashconfig, kernel_accel, kernel_loops); + try_run (device_param, hashconfig, kernel_accel, kernel_loops); } - } - - // if enabled, update also the potfile - // no need for locking, we're in a mutex protected function - - - if (potfile_ctx->fp != NULL) - { - potfile_write_append (potfile_ctx, out_buf, plain_ptr, plain_len); - } - // outfile, can be either to file or stdout - // if an error occurs opening the file, send to stdout as fallback - // the fp gets opened for each cracked hash so that the user can modify (move) the outfile while hashcat runs - - - outfile_write_open (outfile_ctx); - - if (outfile_ctx->filename == NULL) if (quiet == 0) clear_prompt (); + device_param->kernel_accel = kernel_accel; + device_param->kernel_loops = kernel_loops; - outfile_write (outfile_ctx, out_buf, plain_ptr, plain_len, crackpos, NULL, 0, hashconfig); + const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; - outfile_write_close (outfile_ctx); + device_param->kernel_power = kernel_power; - if ((data.wordlist_mode == WL_MODE_FILE) || (data.wordlist_mode == WL_MODE_MASK)) - { - if ((data.devices_status != STATUS_CRACKED) && (data.status != 1)) - { - if (outfile_ctx->filename == NULL) if (quiet == 0) send_prompt (); - } + return 0; } - // if enabled, update also the loopback file + // from here it's clear we are allowed to autotune + // so let's init some fake words + const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max; - if (loopback_ctx->fp != NULL) + if (data.attack_kern == ATTACK_KERN_BF) { - loopback_write_append (loopback_ctx, plain_ptr, plain_len); + run_kernel_memset (device_param, device_param->d_pws_buf, 7, kernel_power_max * sizeof (pw_t)); } - - // if enabled, update also the (rule) debug file - - if (debugfile_ctx->fp != NULL) + else { - // the next check implies that: - // - (data.attack_mode == ATTACK_MODE_STRAIGHT) - // - debug_mode > 0 - - if ((debug_plain_len > 0) || (debug_rule_len > 0)) + for (u32 i = 0; i < kernel_power_max; i++) { - debugfile_write_append (debugfile_ctx, debug_rule_buf, debug_rule_len, debug_plain_ptr, debug_plain_len, plain_ptr, plain_len); + device_param->pws_buf[i].i[0] = i; + device_param->pws_buf[i].i[1] = 0x01234567; + device_param->pws_buf[i].pw_len = 7 + (i & 7); } - } -} - -static int check_cracked (hc_device_param_t *device_param, const uint salt_pos, hashconfig_t *hashconfig) -{ - salt_t *salt_buf = &data.salts_buf[salt_pos]; - - u32 num_cracked; - - cl_int CL_err; - - CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - if (num_cracked) - { - // display hack (for weak hashes etc, it could be that there is still something to clear on the current line) - - log_info_nn (""); - - plain_t *cracked = (plain_t *) mycalloc (num_cracked, sizeof (plain_t)); - CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL); + cl_int CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); if (CL_err != CL_SUCCESS) { - log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); return -1; } + } - uint cpt_cracked = 0; - - hc_thread_mutex_lock (mux_display); - - for (uint i = 0; i < num_cracked; i++) - { - const uint hash_pos = cracked[i].hash_pos; - - if (data.digests_shown[hash_pos] == 1) continue; - - if ((hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) == 0) - { - data.digests_shown[hash_pos] = 1; - - data.digests_done++; - - cpt_cracked++; - - salt_buf->digests_done++; - - if (salt_buf->digests_done == salt_buf->digests_cnt) - { - data.salts_shown[salt_pos] = 1; - - data.salts_done++; - } - } - - if (data.salts_done == data.salts_cnt) data.devices_status = STATUS_CRACKED; - - check_hash (device_param, &cracked[i]); - } - - hc_thread_mutex_unlock (mux_display); - - myfree (cracked); - - if (cpt_cracked > 0) - { - hc_thread_mutex_lock (mux_display); - - data.cpt_buf[data.cpt_pos].timestamp = time (NULL); - data.cpt_buf[data.cpt_pos].cracked = cpt_cracked; - - data.cpt_pos++; - - data.cpt_total += cpt_cracked; - - if (data.cpt_pos == CPT_BUF) data.cpt_pos = 0; - - hc_thread_mutex_unlock (mux_display); - } - - if (hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (data.kernel_rules_cnt > 1) { - // we need to reset cracked state on the device - // otherwise host thinks again and again the hash was cracked - // and returns invalid password each time - - memset (data.digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (uint)); - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (uint), salt_buf->digests_cnt * sizeof (uint), &data.digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL); + cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL); if (CL_err != CL_SUCCESS) { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); return -1; } - } - - num_cracked = 0; - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - return 0; -} - -static void save_hash () -{ - hashconfig_t *hashconfig = data.hashconfig; - void *digests_buf = data.digests_buf; - salt_t *salts_buf = data.salts_buf; - void *esalts_buf = data.esalts_buf; - hashinfo_t **hash_info = data.hash_info; - char *hashfile = data.hashfile; - - char new_hashfile[256] = { 0 }; - char old_hashfile[256] = { 0 }; - - snprintf (new_hashfile, 255, "%s.new", hashfile); - snprintf (old_hashfile, 255, "%s.old", hashfile); - - unlink (new_hashfile); - - char separator = hashconfig->separator; - - FILE *fp = fopen (new_hashfile, "wb"); - - if (fp == NULL) - { - log_error ("ERROR: %s: %s", new_hashfile, strerror (errno)); - - exit (-1); - } - - for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++) - { - if (data.salts_shown[salt_pos] == 1) continue; - - salt_t *salt_buf = &data.salts_buf[salt_pos]; - - for (uint digest_pos = 0; digest_pos < salt_buf->digests_cnt; digest_pos++) - { - uint idx = salt_buf->digests_offset + digest_pos; - - if (data.digests_shown[idx] == 1) continue; - - if (hashconfig->hash_mode != 2500) - { - if (data.username == 1) - { - user_t *user = data.hash_info[idx]->user; - - uint i; - - for (i = 0; i < user->user_len; i++) fputc (user->user_name[i], fp); - - fputc (separator, fp); - } - - char out_buf[HCBUFSIZ_LARGE]; // scratch buffer - - out_buf[0] = 0; - - ascii_digest (out_buf, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf, hash_info, hashfile); - - fputs (out_buf, fp); - - fputc ('\n', fp); - } - else - { - hccap_t hccap; - - to_hccap_t (&hccap, salt_pos, digest_pos, hashconfig, digests_buf, salts_buf, esalts_buf); - - fwrite (&hccap, sizeof (hccap_t), 1, fp); - } - } - } - - fflush (fp); - - fclose (fp); - - unlink (old_hashfile); - - if (rename (hashfile, old_hashfile) != 0) - { - log_error ("ERROR: Rename file '%s' to '%s': %s", hashfile, old_hashfile, strerror (errno)); - - exit (-1); - } - - unlink (hashfile); - - if (rename (new_hashfile, hashfile) != 0) - { - log_error ("ERROR: Rename file '%s' to '%s': %s", new_hashfile, hashfile, strerror (errno)); - - exit (-1); - } - - unlink (old_hashfile); -} - -static int run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update, const uint iteration, hashconfig_t *hashconfig) -{ - cl_int CL_err = CL_SUCCESS; - - uint num_elements = num; - - device_param->kernel_params_buf32[33] = data.combs_mode; - device_param->kernel_params_buf32[34] = num; - - uint kernel_threads = device_param->kernel_threads; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = NULL; - - switch (kern_run) - { - case KERN_RUN_1: kernel = device_param->kernel1; break; - case KERN_RUN_12: kernel = device_param->kernel12; break; - case KERN_RUN_2: kernel = device_param->kernel2; break; - case KERN_RUN_23: kernel = device_param->kernel23; break; - case KERN_RUN_3: kernel = device_param->kernel3; break; - } - - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 24, sizeof (cl_uint), device_param->kernel_params[24]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 25, sizeof (cl_uint), device_param->kernel_params[25]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 26, sizeof (cl_uint), device_param->kernel_params[26]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 27, sizeof (cl_uint), device_param->kernel_params[27]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 28, sizeof (cl_uint), device_param->kernel_params[28]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 29, sizeof (cl_uint), device_param->kernel_params[29]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 32, sizeof (cl_uint), device_param->kernel_params[32]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 33, sizeof (cl_uint), device_param->kernel_params[33]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 34, sizeof (cl_uint), device_param->kernel_params[34]); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - cl_event event; - - if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (data.attack_mode == ATTACK_MODE_BF)) - { - const size_t global_work_size[3] = { num_elements, 32, 1 }; - const size_t local_work_size[3] = { kernel_threads / 32, 32, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - else - { - if (kern_run == KERN_RUN_2) - { - if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD) - { - num_elements = CEIL (num_elements / device_param->vector_width); - } - } - - while (num_elements % kernel_threads) num_elements++; - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - CL_err = hc_clFlush (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - if (device_param->nvidia_spin_damp > 0) - { - if (data.devices_status == STATUS_RUNNING) - { - if (iteration < EXPECTED_ITERATIONS) - { - switch (kern_run) - { - case KERN_RUN_1: if (device_param->exec_us_prev1[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev1[iteration] * device_param->nvidia_spin_damp)); break; - case KERN_RUN_2: if (device_param->exec_us_prev2[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev2[iteration] * device_param->nvidia_spin_damp)); break; - case KERN_RUN_3: if (device_param->exec_us_prev3[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev3[iteration] * device_param->nvidia_spin_damp)); break; - } - } - } - } - - CL_err = hc_clWaitForEvents (data.ocl, 1, &event); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clWaitForEvents(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - cl_ulong time_start; - cl_ulong time_end; - - CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); - CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clGetEventProfilingInfo(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - const double exec_us = (double) (time_end - time_start) / 1000; - - if (data.devices_status == STATUS_RUNNING) - { - if (iteration < EXPECTED_ITERATIONS) - { - switch (kern_run) - { - case KERN_RUN_1: device_param->exec_us_prev1[iteration] = exec_us; break; - case KERN_RUN_2: device_param->exec_us_prev2[iteration] = exec_us; break; - case KERN_RUN_3: device_param->exec_us_prev3[iteration] = exec_us; break; - } - } - } - - if (event_update) - { - uint exec_pos = device_param->exec_pos; - - device_param->exec_ms[exec_pos] = exec_us / 1000; - - exec_pos++; - - if (exec_pos == EXEC_CACHE) - { - exec_pos = 0; - } - - device_param->exec_pos = exec_pos; - } - - CL_err = hc_clReleaseEvent (data.ocl, event); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clReleaseEvent(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFinish (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - return 0; -} - -static int run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num) -{ - cl_int CL_err = CL_SUCCESS; - - uint num_elements = num; - - switch (kern_run) - { - case KERN_RUN_MP: device_param->kernel_params_mp_buf32[8] = num; break; - case KERN_RUN_MP_R: device_param->kernel_params_mp_r_buf32[8] = num; break; - case KERN_RUN_MP_L: device_param->kernel_params_mp_l_buf32[9] = num; break; - } - - // causes problems with special threads like in bcrypt - // const uint kernel_threads = device_param->kernel_threads; - - uint kernel_threads = device_param->kernel_threads; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = NULL; - - switch (kern_run) - { - case KERN_RUN_MP: kernel = device_param->kernel_mp; break; - case KERN_RUN_MP_R: kernel = device_param->kernel_mp_r; break; - case KERN_RUN_MP_L: kernel = device_param->kernel_mp_l; break; - } - - switch (kern_run) - { - case KERN_RUN_MP: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp[4]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp[5]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp[6]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp[7]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp[8]); - break; - case KERN_RUN_MP_R: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_r[4]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_r[5]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_r[6]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_r[7]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_r[8]); - break; - case KERN_RUN_MP_L: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_l[4]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_l[5]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_l[6]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_l[7]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_l[8]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 9, sizeof (cl_uint), device_param->kernel_params_mp_l[9]); - break; - } - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFlush (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFinish (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - return 0; -} - -static int run_kernel_tm (hc_device_param_t *device_param) -{ - cl_int CL_err = CL_SUCCESS; - - const uint num_elements = 1024; // fixed - - uint kernel_threads = 32; - - cl_kernel kernel = device_param->kernel_tm; - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFlush (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFinish (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - return 0; -} - -static int run_kernel_amp (hc_device_param_t *device_param, const uint num) -{ - cl_int CL_err = CL_SUCCESS; - - uint num_elements = num; - - device_param->kernel_params_amp_buf32[5] = data.combs_mode; - device_param->kernel_params_amp_buf32[6] = num_elements; - - // causes problems with special threads like in bcrypt - // const uint kernel_threads = device_param->kernel_threads; - - uint kernel_threads = device_param->kernel_threads; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = device_param->kernel_amp; - - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFlush (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFinish (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - return 0; -} - -static int run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num) -{ - cl_int CL_err = CL_SUCCESS; - - const u32 num16d = num / 16; - const u32 num16m = num % 16; - - if (num16d) - { - device_param->kernel_params_memset_buf32[1] = value; - device_param->kernel_params_memset_buf32[2] = num16d; - - uint kernel_threads = device_param->kernel_threads; - - uint num_elements = num16d; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = device_param->kernel_memset; - - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem), (void *) &buf); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); - CL_err |= hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - const size_t global_work_size[3] = { num_elements, 1, 1 }; - const size_t local_work_size[3] = { kernel_threads, 1, 1 }; - - CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFlush (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - CL_err = hc_clFinish (data.ocl, device_param->command_queue); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - if (num16m) - { - u32 tmp[4]; - - tmp[0] = value; - tmp[1] = value; - tmp[2] = value; - tmp[3] = value; - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - return 0; -} - -static int run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size) -{ - return run_kernel_memset (device_param, buf, 0, size); -} - -static int choose_kernel (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt, const uint fast_iteration) -{ - cl_int CL_err = CL_SUCCESS; - - if (hashconfig->hash_mode == 2000) - { - process_stdout (device_param, pws_cnt); - - return 0; - } - - if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - if (attack_mode == ATTACK_MODE_BF) - { - if (opts_type & OPTS_TYPE_PT_BITSLICE) - { - const uint size_tm = 32 * sizeof (bs_word_t); - - run_kernel_bzero (device_param, device_param->d_tm_c, size_tm); - - run_kernel_tm (device_param); - - CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - } - - if (highest_pw_len < 16) - { - run_kernel (KERN_RUN_1, device_param, pws_cnt, true, fast_iteration, hashconfig); - } - else if (highest_pw_len < 32) - { - run_kernel (KERN_RUN_2, device_param, pws_cnt, true, fast_iteration, hashconfig); - } - else - { - run_kernel (KERN_RUN_3, device_param, pws_cnt, true, fast_iteration, hashconfig); - } - } - else - { - run_kernel_amp (device_param, pws_cnt); - - run_kernel (KERN_RUN_1, device_param, pws_cnt, false, 0, hashconfig); - - if (opts_type & OPTS_TYPE_HOOK12) - { - run_kernel (KERN_RUN_12, device_param, pws_cnt, false, 0, hashconfig); - - CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - // do something with data - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - uint iter = salt_buf->salt_iter; - - uint loop_step = device_param->kernel_loops; - - for (uint loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++) - { - uint loop_left = iter - loop_pos; - - loop_left = MIN (loop_left, loop_step); - - device_param->kernel_params_buf32[28] = loop_pos; - device_param->kernel_params_buf32[29] = loop_left; - - run_kernel (KERN_RUN_2, device_param, pws_cnt, true, slow_iteration, hashconfig); - - if (data.devices_status == STATUS_CRACKED) break; - if (data.devices_status == STATUS_ABORTED) break; - if (data.devices_status == STATUS_QUIT) break; - if (data.devices_status == STATUS_BYPASS) break; - - /** - * speed - */ - - const float iter_part = (float) (loop_pos + loop_left) / iter; - - const u64 perf_sum_all = (u64) (pws_cnt * iter_part); - - double speed_ms; - - hc_timer_get (device_param->timer_speed, speed_ms); - - const u32 speed_pos = device_param->speed_pos; - - device_param->speed_cnt[speed_pos] = perf_sum_all; - - device_param->speed_ms[speed_pos] = speed_ms; - - if (data.benchmark == 1) - { - if (speed_ms > 4096) data.devices_status = STATUS_ABORTED; - } - } - - if (opts_type & OPTS_TYPE_HOOK23) - { - run_kernel (KERN_RUN_23, device_param, pws_cnt, false, 0, hashconfig); - - CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - - // do something with data - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - run_kernel (KERN_RUN_3, device_param, pws_cnt, false, 0, hashconfig); - } - - return 0; -} - -static int run_copy (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt) -{ - cl_int CL_err = CL_SUCCESS; - - if (data.attack_kern == ATTACK_KERN_STRAIGHT) - { - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - else if (data.attack_kern == ATTACK_KERN_COMBI) - { - if (data.attack_mode == ATTACK_MODE_COMBI) - { - if (data.combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) - { - for (u32 i = 0; i < pws_cnt; i++) - { - const u32 pw_len = device_param->pws_buf[i].pw_len; - - u8 *ptr = (u8 *) device_param->pws_buf[i].i; - - ptr[pw_len] = 0x01; - } - } - else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) - { - for (u32 i = 0; i < pws_cnt; i++) - { - const u32 pw_len = device_param->pws_buf[i].pw_len; - - u8 *ptr = (u8 *) device_param->pws_buf[i].i; - - ptr[pw_len] = 0x80; - } - } - } - } - else if (data.attack_mode == ATTACK_MODE_HYBRID2) - { - if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) - { - for (u32 i = 0; i < pws_cnt; i++) - { - const u32 pw_len = device_param->pws_buf[i].pw_len; - - u8 *ptr = (u8 *) device_param->pws_buf[i].i; - - ptr[pw_len] = 0x01; - } - } - else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) - { - for (u32 i = 0; i < pws_cnt; i++) - { - const u32 pw_len = device_param->pws_buf[i].pw_len; - - u8 *ptr = (u8 *) device_param->pws_buf[i].i; - - ptr[pw_len] = 0x80; - } - } - } - - CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - else if (data.attack_kern == ATTACK_KERN_BF) - { - const u64 off = device_param->words_off; - - device_param->kernel_params_mp_l_buf64[3] = off; - - run_kernel_mp (KERN_RUN_MP_L, device_param, pws_cnt); - } - - return 0; -} - -static double try_run (hc_device_param_t *device_param, hashconfig_t *hashconfig, const u32 kernel_accel, const u32 kernel_loops) -{ - const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads * kernel_accel; - - device_param->kernel_params_buf32[28] = 0; - device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set - device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes - - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - run_kernel (KERN_RUN_1, device_param, kernel_power_try, true, 0, hashconfig); - } - else - { - run_kernel (KERN_RUN_2, device_param, kernel_power_try, true, 0, hashconfig); - } - - const double exec_ms_prev = get_avg_exec_time (device_param, 1); - - return exec_ms_prev; -} - -static int autotune (hc_device_param_t *device_param, hashconfig_t *hashconfig) -{ - const double target_ms = TARGET_MS_PROFILE[data.workload_profile - 1]; - - const u32 kernel_accel_min = device_param->kernel_accel_min; - const u32 kernel_accel_max = device_param->kernel_accel_max; - - const u32 kernel_loops_min = device_param->kernel_loops_min; - const u32 kernel_loops_max = device_param->kernel_loops_max; - - u32 kernel_accel = kernel_accel_min; - u32 kernel_loops = kernel_loops_min; - - // in this case the user specified a fixed -u and -n on the commandline - // no way to tune anything - // but we need to run a few caching rounds - - if ((kernel_loops_min == kernel_loops_max) && (kernel_accel_min == kernel_accel_max)) - { - if (hashconfig->hash_mode != 2000) - { - try_run (device_param, hashconfig, kernel_accel, kernel_loops); - try_run (device_param, hashconfig, kernel_accel, kernel_loops); - try_run (device_param, hashconfig, kernel_accel, kernel_loops); - try_run (device_param, hashconfig, kernel_accel, kernel_loops); - } - - device_param->kernel_accel = kernel_accel; - device_param->kernel_loops = kernel_loops; - - const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; - - device_param->kernel_power = kernel_power; - - return 0; - } - - // from here it's clear we are allowed to autotune - // so let's init some fake words - - const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max; - - if (data.attack_kern == ATTACK_KERN_BF) - { - run_kernel_memset (device_param, device_param->d_pws_buf, 7, kernel_power_max * sizeof (pw_t)); - } - else - { - for (u32 i = 0; i < kernel_power_max; i++) - { - device_param->pws_buf[i].i[0] = i; - device_param->pws_buf[i].i[1] = 0x01234567; - device_param->pws_buf[i].pw_len = 7 + (i & 7); - } - - cl_int CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - if (data.kernel_rules_cnt > 1) - { - cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL); - - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); - - return -1; - } - } - } - else - { - run_kernel_amp (device_param, kernel_power_max); - } - - #define VERIFIER_CNT 1 - - // first find out highest kernel-loops that stays below target_ms - - if (kernel_loops_min < kernel_loops_max) - { - for (kernel_loops = kernel_loops_max; kernel_loops > kernel_loops_min; kernel_loops >>= 1) - { - double exec_ms = try_run (device_param, hashconfig, kernel_accel_min, kernel_loops); - - for (int i = 0; i < VERIFIER_CNT; i++) - { - double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_min, kernel_loops); - - exec_ms = MIN (exec_ms, exec_ms_v); - } - - if (exec_ms < target_ms) break; - } - } - - // now the same for kernel-accel but with the new kernel-loops from previous loop set - - #define STEPS_CNT 10 - - if (kernel_accel_min < kernel_accel_max) - { - for (int i = 0; i < STEPS_CNT; i++) - { - const u32 kernel_accel_try = 1u << i; - - if (kernel_accel_try < kernel_accel_min) continue; - if (kernel_accel_try > kernel_accel_max) break; - - double exec_ms = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops); - - for (int i = 0; i < VERIFIER_CNT; i++) - { - double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops); - - exec_ms = MIN (exec_ms, exec_ms_v); - } - - if (exec_ms > target_ms) break; - - kernel_accel = kernel_accel_try; - } - } - - // at this point we want to know the actual runtime for the following reason: - // we need a reference for the balancing loop following up, and this - // the balancing loop can have an effect that the creates a new opportunity, for example: - // if the target is 95 ms and the current runtime is 48ms the above loop - // stopped the execution because the previous exec_ms was > 95ms - // due to the rebalance it's possible that the runtime reduces from 48ms to 47ms - // and this creates the possibility to double the workload -> 47 * 2 = 95ms, which is < 96ms - - double exec_ms_pre_final = try_run (device_param, hashconfig, kernel_accel, kernel_loops); - - for (int i = 0; i < VERIFIER_CNT; i++) - { - double exec_ms_pre_final_v = try_run (device_param, hashconfig, kernel_accel, kernel_loops); - - exec_ms_pre_final = MIN (exec_ms_pre_final, exec_ms_pre_final_v); - } - - u32 diff = kernel_loops - kernel_accel; - - if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max)) - { - u32 kernel_accel_orig = kernel_accel; - u32 kernel_loops_orig = kernel_loops; - - for (u32 f = 1; f < 1024; f++) - { - const u32 kernel_accel_try = kernel_accel_orig * f; - const u32 kernel_loops_try = kernel_loops_orig / f; - - if (kernel_accel_try > kernel_accel_max) break; - if (kernel_loops_try < kernel_loops_min) break; - - u32 diff_new = kernel_loops_try - kernel_accel_try; - - if (diff_new > diff) break; - - diff_new = diff; - - double exec_ms = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops_try); - - for (int i = 0; i < VERIFIER_CNT; i++) - { - double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops_try); - - exec_ms = MIN (exec_ms, exec_ms_v); - } - - if (exec_ms < exec_ms_pre_final) - { - exec_ms_pre_final = exec_ms; - - kernel_accel = kernel_accel_try; - kernel_loops = kernel_loops_try; - } - } - } - - const double exec_left = target_ms / exec_ms_pre_final; - - const double accel_left = kernel_accel_max / kernel_accel; - - const double exec_accel_min = MIN (exec_left, accel_left); // we want that to be int - - if (exec_accel_min >= 1.0) - { - // this is safe to not overflow kernel_accel_max because of accel_left - - kernel_accel *= (u32) exec_accel_min; - } - - // reset them fake words - - /* - memset (device_param->pws_buf, 0, kernel_power_max * sizeof (pw_t)); - - hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - */ - - run_kernel_memset (device_param, device_param->d_pws_buf, 0, kernel_power_max * sizeof (pw_t)); - - if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL) - { - run_kernel_memset (device_param, device_param->d_pws_amp_buf, 0, kernel_power_max * sizeof (pw_t)); - } - - // reset timer - - device_param->exec_pos = 0; - - memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double)); - - memset (device_param->exec_us_prev1, 0, EXPECTED_ITERATIONS * sizeof (double)); - memset (device_param->exec_us_prev2, 0, EXPECTED_ITERATIONS * sizeof (double)); - memset (device_param->exec_us_prev3, 0, EXPECTED_ITERATIONS * sizeof (double)); - - // store - - device_param->kernel_accel = kernel_accel; - device_param->kernel_loops = kernel_loops; - - const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; - - device_param->kernel_power = kernel_power; - - #if defined (DEBUG) - - if (data.quiet == 0) - { - clear_prompt (); - - log_info ("- Device #%u: autotuned kernel-accel to %u\n" - "- Device #%u: autotuned kernel-loops to %u\n", - device_param->device_id + 1, kernel_accel, - device_param->device_id + 1, kernel_loops); - - send_prompt (); - } - - #endif - - return 0; -} - -static int run_cracker (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt) -{ - char *line_buf = (char *) mymalloc (HCBUFSIZ_LARGE); - - // init speed timer - - uint speed_pos = device_param->speed_pos; - - #if defined (_POSIX) - if (device_param->timer_speed.tv_sec == 0) - { - hc_timer_set (&device_param->timer_speed); - } - #endif - - #if defined (_WIN) - if (device_param->timer_speed.QuadPart == 0) - { - hc_timer_set (&device_param->timer_speed); - } - #endif - - // find higest password length, this is for optimization stuff - - uint highest_pw_len = 0; - - if (data.attack_kern == ATTACK_KERN_STRAIGHT) - { - } - else if (data.attack_kern == ATTACK_KERN_COMBI) - { - } - else if (data.attack_kern == ATTACK_KERN_BF) - { - highest_pw_len = device_param->kernel_params_mp_l_buf32[4] - + device_param->kernel_params_mp_l_buf32[5]; - } - - // iteration type - - uint innerloop_step = 0; - uint innerloop_cnt = 0; - - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) innerloop_step = device_param->kernel_loops; - else innerloop_step = 1; - - if (data.attack_kern == ATTACK_KERN_STRAIGHT) innerloop_cnt = data.kernel_rules_cnt; - else if (data.attack_kern == ATTACK_KERN_COMBI) innerloop_cnt = data.combs_cnt; - else if (data.attack_kern == ATTACK_KERN_BF) innerloop_cnt = data.bfs_cnt; - - // loop start: most outer loop = salt iteration, then innerloops (if multi) - - for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++) - { - while (data.devices_status == STATUS_PAUSED) hc_sleep (1); - - if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); - - if (data.devices_status == STATUS_CRACKED) break; - if (data.devices_status == STATUS_ABORTED) break; - if (data.devices_status == STATUS_QUIT) break; - if (data.devices_status == STATUS_BYPASS) break; - - salt_t *salt_buf = &data.salts_buf[salt_pos]; - - device_param->kernel_params_buf32[27] = salt_pos; - device_param->kernel_params_buf32[31] = salt_buf->digests_cnt; - device_param->kernel_params_buf32[32] = salt_buf->digests_offset; - - FILE *combs_fp = device_param->combs_fp; - - if (data.attack_mode == ATTACK_MODE_COMBI) - { - rewind (combs_fp); - } - - // innerloops - - for (uint innerloop_pos = 0; innerloop_pos < innerloop_cnt; innerloop_pos += innerloop_step) - { - while (data.devices_status == STATUS_PAUSED) hc_sleep (1); - - if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); - - if (data.devices_status == STATUS_CRACKED) break; - if (data.devices_status == STATUS_ABORTED) break; - if (data.devices_status == STATUS_QUIT) break; - if (data.devices_status == STATUS_BYPASS) break; - - uint fast_iteration = 0; - - uint innerloop_left = innerloop_cnt - innerloop_pos; - - if (innerloop_left > innerloop_step) - { - innerloop_left = innerloop_step; - - fast_iteration = 1; - } - - device_param->innerloop_pos = innerloop_pos; - device_param->innerloop_left = innerloop_left; - - device_param->kernel_params_buf32[30] = innerloop_left; - - // i think we can get rid of this - if (innerloop_left == 0) - { - puts ("bug, how should this happen????\n"); - - continue; - } - - if (data.salts_shown[salt_pos] == 1) - { - data.words_progress_done[salt_pos] += (u64) pws_cnt * (u64) innerloop_left; - - continue; - } - - // initialize amplifiers - - if (data.attack_mode == ATTACK_MODE_COMBI) - { - uint i = 0; - - while (i < innerloop_left) - { - if (feof (combs_fp)) break; - - int line_len = fgetl (combs_fp, line_buf); - - if (line_len >= PW_MAX1) continue; - - line_len = convert_from_hex (line_buf, line_len); - - char *line_buf_new = line_buf; - - if (run_rule_engine (data.rule_len_r, data.rule_buf_r)) - { - char rule_buf_out[BLOCK_SIZE] = { 0 }; - - int rule_len_out = _old_apply_rule (data.rule_buf_r, data.rule_len_r, line_buf, line_len, rule_buf_out); - - if (rule_len_out < 0) - { - data.words_progress_rejected[salt_pos] += pws_cnt; - - continue; - } - - line_len = rule_len_out; - - line_buf_new = rule_buf_out; - } + } + } + else + { + run_kernel_amp (device_param, kernel_power_max); + } - line_len = MIN (line_len, PW_DICTMAX); + #define VERIFIER_CNT 1 - u8 *ptr = (u8 *) device_param->combs_buf[i].i; + // first find out highest kernel-loops that stays below target_ms - memcpy (ptr, line_buf_new, line_len); + if (kernel_loops_min < kernel_loops_max) + { + for (kernel_loops = kernel_loops_max; kernel_loops > kernel_loops_min; kernel_loops >>= 1) + { + double exec_ms = try_run (device_param, hashconfig, kernel_accel_min, kernel_loops); - memset (ptr + line_len, 0, PW_DICTMAX1 - line_len); + for (int i = 0; i < VERIFIER_CNT; i++) + { + double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_min, kernel_loops); - if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) - { - uppercase (ptr, line_len); - } + exec_ms = MIN (exec_ms, exec_ms_v); + } - if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) - { - ptr[line_len] = 0x80; - } + if (exec_ms < target_ms) break; + } + } - if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) - { - ptr[line_len] = 0x01; - } - } + // now the same for kernel-accel but with the new kernel-loops from previous loop set - device_param->combs_buf[i].pw_len = line_len; + #define STEPS_CNT 10 - i++; - } + if (kernel_accel_min < kernel_accel_max) + { + for (int i = 0; i < STEPS_CNT; i++) + { + const u32 kernel_accel_try = 1u << i; - for (uint j = i; j < innerloop_left; j++) - { - device_param->combs_buf[j].i[0] = 0; - device_param->combs_buf[j].i[1] = 0; - device_param->combs_buf[j].i[2] = 0; - device_param->combs_buf[j].i[3] = 0; - device_param->combs_buf[j].i[4] = 0; - device_param->combs_buf[j].i[5] = 0; - device_param->combs_buf[j].i[6] = 0; - device_param->combs_buf[j].i[7] = 0; + if (kernel_accel_try < kernel_accel_min) continue; + if (kernel_accel_try > kernel_accel_max) break; - device_param->combs_buf[j].pw_len = 0; - } + double exec_ms = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops); - innerloop_left = i; - } - else if (data.attack_mode == ATTACK_MODE_BF) + for (int i = 0; i < VERIFIER_CNT; i++) { - u64 off = innerloop_pos; - - device_param->kernel_params_mp_r_buf64[3] = off; + double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops); - run_kernel_mp (KERN_RUN_MP_R, device_param, innerloop_left); + exec_ms = MIN (exec_ms, exec_ms_v); } - else if (data.attack_mode == ATTACK_MODE_HYBRID1) - { - u64 off = innerloop_pos; - device_param->kernel_params_mp_buf64[3] = off; + if (exec_ms > target_ms) break; - run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); - } - else if (data.attack_mode == ATTACK_MODE_HYBRID2) - { - u64 off = innerloop_pos; + kernel_accel = kernel_accel_try; + } + } - device_param->kernel_params_mp_buf64[3] = off; + // at this point we want to know the actual runtime for the following reason: + // we need a reference for the balancing loop following up, and this + // the balancing loop can have an effect that the creates a new opportunity, for example: + // if the target is 95 ms and the current runtime is 48ms the above loop + // stopped the execution because the previous exec_ms was > 95ms + // due to the rebalance it's possible that the runtime reduces from 48ms to 47ms + // and this creates the possibility to double the workload -> 47 * 2 = 95ms, which is < 96ms - run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); - } + double exec_ms_pre_final = try_run (device_param, hashconfig, kernel_accel, kernel_loops); - // copy amplifiers + for (int i = 0; i < VERIFIER_CNT; i++) + { + double exec_ms_pre_final_v = try_run (device_param, hashconfig, kernel_accel, kernel_loops); - if (data.attack_mode == ATTACK_MODE_STRAIGHT) - { - cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL); + exec_ms_pre_final = MIN (exec_ms_pre_final, exec_ms_pre_final_v); + } - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + u32 diff = kernel_loops - kernel_accel; - return -1; - } - } - else if (data.attack_mode == ATTACK_MODE_COMBI) - { - cl_int CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (comb_t), device_param->combs_buf, 0, NULL, NULL); + if ((kernel_loops_min < kernel_loops_max) && (kernel_accel_min < kernel_accel_max)) + { + u32 kernel_accel_orig = kernel_accel; + u32 kernel_loops_orig = kernel_loops; - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + for (u32 f = 1; f < 1024; f++) + { + const u32 kernel_accel_try = kernel_accel_orig * f; + const u32 kernel_loops_try = kernel_loops_orig / f; - return -1; - } - } - else if (data.attack_mode == ATTACK_MODE_BF) - { - cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL); + if (kernel_accel_try > kernel_accel_max) break; + if (kernel_loops_try < kernel_loops_min) break; - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + u32 diff_new = kernel_loops_try - kernel_accel_try; - return -1; - } - } - else if (data.attack_mode == ATTACK_MODE_HYBRID1) - { - cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL); + if (diff_new > diff) break; - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + diff_new = diff; - return -1; - } - } - else if (data.attack_mode == ATTACK_MODE_HYBRID2) - { - cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL); + double exec_ms = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops_try); - if (CL_err != CL_SUCCESS) - { - log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + for (int i = 0; i < VERIFIER_CNT; i++) + { + double exec_ms_v = try_run (device_param, hashconfig, kernel_accel_try, kernel_loops_try); - return -1; - } + exec_ms = MIN (exec_ms, exec_ms_v); } - if (data.benchmark == 1) + if (exec_ms < exec_ms_pre_final) { - hc_timer_set (&device_param->timer_speed); - } - - int rc = choose_kernel (device_param, hashconfig, hashconfig->attack_exec, data.attack_mode, hashconfig->opts_type, salt_buf, highest_pw_len, pws_cnt, fast_iteration); + exec_ms_pre_final = exec_ms; - if (rc == -1) return -1; + kernel_accel = kernel_accel_try; + kernel_loops = kernel_loops_try; + } + } + } - if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); + const double exec_left = target_ms / exec_ms_pre_final; - if (data.devices_status == STATUS_CRACKED) break; - if (data.devices_status == STATUS_ABORTED) break; - if (data.devices_status == STATUS_QUIT) break; - if (data.devices_status == STATUS_BYPASS) break; + const double accel_left = kernel_accel_max / kernel_accel; - /** - * result - */ + const double exec_accel_min = MIN (exec_left, accel_left); // we want that to be int - if (data.benchmark == 0) - { - check_cracked (device_param, salt_pos, hashconfig); - } + if (exec_accel_min >= 1.0) + { + // this is safe to not overflow kernel_accel_max because of accel_left - /** - * progress - */ + kernel_accel *= (u32) exec_accel_min; + } - u64 perf_sum_all = (u64) pws_cnt * (u64) innerloop_left; + // reset them fake words - hc_thread_mutex_lock (mux_counter); + /* + memset (device_param->pws_buf, 0, kernel_power_max * sizeof (pw_t)); - data.words_progress_done[salt_pos] += perf_sum_all; + hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_amp_buf, CL_TRUE, 0, kernel_power_max * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + */ - hc_thread_mutex_unlock (mux_counter); + run_kernel_memset (device_param, device_param->d_pws_buf, 0, kernel_power_max * sizeof (pw_t)); - /** - * speed - */ + if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL) + { + run_kernel_memset (device_param, device_param->d_pws_amp_buf, 0, kernel_power_max * sizeof (pw_t)); + } - double speed_ms; + // reset timer - hc_timer_get (device_param->timer_speed, speed_ms); + device_param->exec_pos = 0; - hc_timer_set (&device_param->timer_speed); + memset (device_param->exec_ms, 0, EXEC_CACHE * sizeof (double)); - // current speed + memset (device_param->exec_us_prev1, 0, EXPECTED_ITERATIONS * sizeof (double)); + memset (device_param->exec_us_prev2, 0, EXPECTED_ITERATIONS * sizeof (double)); + memset (device_param->exec_us_prev3, 0, EXPECTED_ITERATIONS * sizeof (double)); - //hc_thread_mutex_lock (mux_display); + // store - device_param->speed_cnt[speed_pos] = perf_sum_all; + device_param->kernel_accel = kernel_accel; + device_param->kernel_loops = kernel_loops; - device_param->speed_ms[speed_pos] = speed_ms; + const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; - //hc_thread_mutex_unlock (mux_display); + device_param->kernel_power = kernel_power; - speed_pos++; + #if defined (DEBUG) - if (speed_pos == SPEED_CACHE) - { - speed_pos = 0; - } + if (data.quiet == 0) + { + clear_prompt (); - /** - * benchmark - */ + log_info ("- Device #%u: autotuned kernel-accel to %u\n" + "- Device #%u: autotuned kernel-loops to %u\n", + device_param->device_id + 1, kernel_accel, + device_param->device_id + 1, kernel_loops); - if (data.benchmark == 1) break; - } + send_prompt (); } - device_param->speed_pos = speed_pos; - - myfree (line_buf); + #endif return 0; } + + static void *thread_monitor (void *p) { uint runtime_check = 0; @@ -2946,46 +1288,6 @@ static void bypass () log_info ("Next dictionary / mask in queue selected, bypassing current one"); } -static void stop_at_checkpoint () -{ - if (data.devices_status != STATUS_STOP_AT_CHECKPOINT) - { - if (data.devices_status != STATUS_RUNNING) return; - } - - // this feature only makes sense if --restore-disable was not specified - - if (data.restore_disable == 1) - { - log_info ("WARNING: This feature is disabled when --restore-disable is specified"); - - return; - } - - // check if monitoring of Restore Point updates should be enabled or disabled - - if (data.devices_status != STATUS_STOP_AT_CHECKPOINT) - { - data.devices_status = STATUS_STOP_AT_CHECKPOINT; - - // save the current restore point value - - data.checkpoint_cur_words = get_lowest_words_done (); - - log_info ("Checkpoint enabled: Will quit at next Restore Point update"); - } - else - { - data.devices_status = STATUS_RUNNING; - - // reset the global value for checkpoint checks - - data.checkpoint_cur_words = 0; - - log_info ("Checkpoint disabled: Restore Point updates will no longer be monitored"); - } -} - static void *thread_autotune (void *p) { hc_device_param_t *device_param = (hc_device_param_t *) p; @@ -3531,89 +1833,6 @@ static void *thread_keypress (void *p) return (p); } -static void weak_hash_check (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint salt_pos) -{ - if (!device_param) - { - log_error ("ERROR: %s : Invalid argument", __func__); - - exit (-1); - } - - salt_t *salt_buf = &data.salts_buf[salt_pos]; - - device_param->kernel_params_buf32[27] = salt_pos; - device_param->kernel_params_buf32[30] = 1; - device_param->kernel_params_buf32[31] = salt_buf->digests_cnt; - device_param->kernel_params_buf32[32] = salt_buf->digests_offset; - device_param->kernel_params_buf32[33] = 0; - device_param->kernel_params_buf32[34] = 1; - - char *dictfile_old = data.dictfile; - - const char *weak_hash_check = "weak-hash-check"; - - data.dictfile = (char *) weak_hash_check; - - uint cmd0_rule_old = data.kernel_rules_buf[0].cmds[0]; - - data.kernel_rules_buf[0].cmds[0] = 0; - - /** - * run the kernel - */ - - if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) - { - run_kernel (KERN_RUN_1, device_param, 1, false, 0, hashconfig); - } - else - { - run_kernel (KERN_RUN_1, device_param, 1, false, 0, hashconfig); - - uint loop_step = 16; - - const uint iter = salt_buf->salt_iter; - - for (uint loop_pos = 0; loop_pos < iter; loop_pos += loop_step) - { - uint loop_left = iter - loop_pos; - - loop_left = MIN (loop_left, loop_step); - - device_param->kernel_params_buf32[28] = loop_pos; - device_param->kernel_params_buf32[29] = loop_left; - - run_kernel (KERN_RUN_2, device_param, 1, false, 0, hashconfig); - } - - run_kernel (KERN_RUN_3, device_param, 1, false, 0, hashconfig); - } - - /** - * result - */ - - check_cracked (device_param, salt_pos, hashconfig); - - /** - * cleanup - */ - - device_param->kernel_params_buf32[27] = 0; - device_param->kernel_params_buf32[28] = 0; - device_param->kernel_params_buf32[29] = 0; - device_param->kernel_params_buf32[30] = 0; - device_param->kernel_params_buf32[31] = 0; - device_param->kernel_params_buf32[32] = 0; - device_param->kernel_params_buf32[33] = 0; - device_param->kernel_params_buf32[34] = 0; - - data.dictfile = dictfile_old; - - data.kernel_rules_buf[0].cmds[0] = cmd0_rule_old; -} - int main (int argc, char **argv) { #if defined (_WIN) diff --git a/src/hlfmt.c b/src/hlfmt.c index 416f1a6ab..f54771ade 100644 --- a/src/hlfmt.c +++ b/src/hlfmt.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "bitops.h" #include "memory.h" @@ -23,7 +24,6 @@ #include "filehandling.h" #include "rp_cpu.h" #include "inc_hash_constants.h" -#include "interface.h" #include "shared.h" #include "hwmon.h" #include "mpsp.h" diff --git a/src/hwmon.c b/src/hwmon.c index 6d57c5c96..5531ac4e8 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "logging.h" @@ -15,7 +16,6 @@ #include "ext_nvml.h" #include "ext_xnvctrl.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "rp_cpu.h" #include "restore.h" diff --git a/src/interface.c b/src/interface.c index 5c69b3645..820ef7b96 100644 --- a/src/interface.c +++ b/src/interface.c @@ -23,6 +23,26 @@ #include "cpu_sha256.h" #include "interface.h" +static const char OPTI_STR_ZERO_BYTE[] = "Zero-Byte"; +static const char OPTI_STR_PRECOMPUTE_INIT[] = "Precompute-Init"; +static const char OPTI_STR_PRECOMPUTE_MERKLE[] = "Precompute-Merkle-Demgard"; +static const char OPTI_STR_PRECOMPUTE_PERMUT[] = "Precompute-Final-Permutation"; +static const char OPTI_STR_MEET_IN_MIDDLE[] = "Meet-In-The-Middle"; +static const char OPTI_STR_EARLY_SKIP[] = "Early-Skip"; +static const char OPTI_STR_NOT_SALTED[] = "Not-Salted"; +static const char OPTI_STR_NOT_ITERATED[] = "Not-Iterated"; +static const char OPTI_STR_PREPENDED_SALT[] = "Prepended-Salt"; +static const char OPTI_STR_APPENDED_SALT[] = "Appended-Salt"; +static const char OPTI_STR_SINGLE_HASH[] = "Single-Hash"; +static const char OPTI_STR_SINGLE_SALT[] = "Single-Salt"; +static const char OPTI_STR_BRUTE_FORCE[] = "Brute-Force"; +static const char OPTI_STR_RAW_HASH[] = "Raw-Hash"; +static const char OPTI_STR_SLOW_HASH_SIMD[] = "Slow-Hash-SIMD"; +static const char OPTI_STR_USES_BITS_8[] = "Uses-8-Bit"; +static const char OPTI_STR_USES_BITS_16[] = "Uses-16-Bit"; +static const char OPTI_STR_USES_BITS_32[] = "Uses-32-Bit"; +static const char OPTI_STR_USES_BITS_64[] = "Uses-64-Bit"; + static const char PA_000[] = "OK"; static const char PA_001[] = "Ignored due to comment"; static const char PA_002[] = "Ignored due to zero length"; @@ -12821,6 +12841,34 @@ int win8phone_parse_hash (char *input_buf, uint input_len, hash_t *hash_buf, con * output */ +char *stroptitype (const uint opti_type) +{ + switch (opti_type) + { + case OPTI_TYPE_ZERO_BYTE: return ((char *) OPTI_STR_ZERO_BYTE); + case OPTI_TYPE_PRECOMPUTE_INIT: return ((char *) OPTI_STR_PRECOMPUTE_INIT); + case OPTI_TYPE_PRECOMPUTE_MERKLE: return ((char *) OPTI_STR_PRECOMPUTE_MERKLE); + case OPTI_TYPE_PRECOMPUTE_PERMUT: return ((char *) OPTI_STR_PRECOMPUTE_PERMUT); + case OPTI_TYPE_MEET_IN_MIDDLE: return ((char *) OPTI_STR_MEET_IN_MIDDLE); + case OPTI_TYPE_EARLY_SKIP: return ((char *) OPTI_STR_EARLY_SKIP); + case OPTI_TYPE_NOT_SALTED: return ((char *) OPTI_STR_NOT_SALTED); + case OPTI_TYPE_NOT_ITERATED: return ((char *) OPTI_STR_NOT_ITERATED); + case OPTI_TYPE_PREPENDED_SALT: return ((char *) OPTI_STR_PREPENDED_SALT); + case OPTI_TYPE_APPENDED_SALT: return ((char *) OPTI_STR_APPENDED_SALT); + case OPTI_TYPE_SINGLE_HASH: return ((char *) OPTI_STR_SINGLE_HASH); + case OPTI_TYPE_SINGLE_SALT: return ((char *) OPTI_STR_SINGLE_SALT); + case OPTI_TYPE_BRUTE_FORCE: return ((char *) OPTI_STR_BRUTE_FORCE); + case OPTI_TYPE_RAW_HASH: return ((char *) OPTI_STR_RAW_HASH); + case OPTI_TYPE_SLOW_HASH_SIMD: return ((char *) OPTI_STR_SLOW_HASH_SIMD); + case OPTI_TYPE_USES_BITS_8: return ((char *) OPTI_STR_USES_BITS_8); + case OPTI_TYPE_USES_BITS_16: return ((char *) OPTI_STR_USES_BITS_16); + case OPTI_TYPE_USES_BITS_32: return ((char *) OPTI_STR_USES_BITS_32); + case OPTI_TYPE_USES_BITS_64: return ((char *) OPTI_STR_USES_BITS_64); + } + + return (NULL); +} + char *strhashtype (const uint hash_mode) { switch (hash_mode) diff --git a/src/kernel.c b/src/kernel.c deleted file mode 100644 index 4509c3bed..000000000 --- a/src/kernel.c +++ /dev/null @@ -1,7 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#include "common.h" -#include "kernel.h" diff --git a/src/logfile.c b/src/logfile.c index 0bdb47ac7..e8a182380 100644 --- a/src/logfile.c +++ b/src/logfile.c @@ -10,6 +10,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "ext_OpenCL.h" #include "ext_ADL.h" @@ -20,7 +21,6 @@ #include "memory.h" #include "hwmon.h" #include "rp_cpu.h" -#include "interface.h" #include "mpsp.h" #include "opencl.h" #include "restore.h" diff --git a/src/mpsp.c b/src/mpsp.c index c737b4f07..2ae34ab9a 100644 --- a/src/mpsp.c +++ b/src/mpsp.c @@ -10,6 +10,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "logging.h" @@ -22,7 +23,6 @@ #include "filehandling.h" #include "thread.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "rp_cpu.h" #include "opencl.h" diff --git a/src/opencl.c b/src/opencl.c index 58b5e3717..da42c0509 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -6,6 +6,8 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" +#include "convert.h" #include "memory.h" #include "logging.h" #include "locking.h" @@ -19,17 +21,25 @@ #include "opencl.h" #include "shared.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" +#include "status.h" +#include "stdout.h" #include "restore.h" #include "outfile.h" #include "potfile.h" #include "debugfile.h" #include "loopback.h" +#include "thread.h" +#include "dictstat.h" +#include "wordlist.h" +#include "filehandling.h" +#include "hash_management.h" #include "data.h" extern hc_global_data_t data; +extern hc_thread_mutex_t mux_counter; + uint setup_opencl_platforms_filter (char *opencl_platforms) { uint opencl_platforms_filter = 0; @@ -196,43 +206,1097 @@ void writeProgramBin (char *dst, u8 *binary, size_t binary_size) } } -double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_entries) +int gidd_to_pw_t (hc_device_param_t *device_param, const u64 gidd, pw_t *pw) { - int exec_pos = (int) device_param->exec_pos - last_num_entries; + cl_int CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, gidd * sizeof (pw_t), sizeof (pw_t), pw, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); - if (exec_pos < 0) exec_pos += EXEC_CACHE; + return -1; + } - double exec_ms_sum = 0; + return 0; +} - int exec_ms_cnt = 0; +int choose_kernel (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint attack_exec, const uint attack_mode, const uint opts_type, const salt_t *salt_buf, const uint highest_pw_len, const uint pws_cnt, const uint fast_iteration) +{ + cl_int CL_err = CL_SUCCESS; - for (int i = 0; i < last_num_entries; i++) + if (hashconfig->hash_mode == 2000) { - double exec_ms = device_param->exec_ms[(exec_pos + i) % EXEC_CACHE]; + process_stdout (device_param, pws_cnt); + + return 0; + } - if (exec_ms > 0) + if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (attack_mode == ATTACK_MODE_BF) { - exec_ms_sum += exec_ms; + if (opts_type & OPTS_TYPE_PT_BITSLICE) + { + const uint size_tm = 32 * sizeof (bs_word_t); + + run_kernel_bzero (device_param, device_param->d_tm_c, size_tm); + + run_kernel_tm (device_param); + + CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + } - exec_ms_cnt++; + if (highest_pw_len < 16) + { + run_kernel (KERN_RUN_1, device_param, pws_cnt, true, fast_iteration, hashconfig); + } + else if (highest_pw_len < 32) + { + run_kernel (KERN_RUN_2, device_param, pws_cnt, true, fast_iteration, hashconfig); + } + else + { + run_kernel (KERN_RUN_3, device_param, pws_cnt, true, fast_iteration, hashconfig); } } + else + { + run_kernel_amp (device_param, pws_cnt); + + run_kernel (KERN_RUN_1, device_param, pws_cnt, false, 0, hashconfig); + + if (opts_type & OPTS_TYPE_HOOK12) + { + run_kernel (KERN_RUN_12, device_param, pws_cnt, false, 0, hashconfig); + + CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + // do something with data + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + uint iter = salt_buf->salt_iter; + + uint loop_step = device_param->kernel_loops; + + for (uint loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++) + { + uint loop_left = iter - loop_pos; + + loop_left = MIN (loop_left, loop_step); + + device_param->kernel_params_buf32[28] = loop_pos; + device_param->kernel_params_buf32[29] = loop_left; + + run_kernel (KERN_RUN_2, device_param, pws_cnt, true, slow_iteration, hashconfig); + + if (data.devices_status == STATUS_CRACKED) break; + if (data.devices_status == STATUS_ABORTED) break; + if (data.devices_status == STATUS_QUIT) break; + if (data.devices_status == STATUS_BYPASS) break; + + /** + * speed + */ + + const float iter_part = (float) (loop_pos + loop_left) / iter; + + const u64 perf_sum_all = (u64) (pws_cnt * iter_part); + + double speed_ms; + + hc_timer_get (device_param->timer_speed, speed_ms); + + const u32 speed_pos = device_param->speed_pos; + + device_param->speed_cnt[speed_pos] = perf_sum_all; + + device_param->speed_ms[speed_pos] = speed_ms; + + if (data.benchmark == 1) + { + if (speed_ms > 4096) data.devices_status = STATUS_ABORTED; + } + } + + if (opts_type & OPTS_TYPE_HOOK23) + { + run_kernel (KERN_RUN_23, device_param, pws_cnt, false, 0, hashconfig); + + CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + // do something with data + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } - if (exec_ms_cnt == 0) return 0; + run_kernel (KERN_RUN_3, device_param, pws_cnt, false, 0, hashconfig); + } - return exec_ms_sum / exec_ms_cnt; + return 0; } -int gidd_to_pw_t (hc_device_param_t *device_param, const u64 gidd, pw_t *pw) +int run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num, const uint event_update, const uint iteration, hashconfig_t *hashconfig) { - cl_int CL_err = hc_clEnqueueReadBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, gidd * sizeof (pw_t), sizeof (pw_t), pw, 0, NULL, NULL); + cl_int CL_err = CL_SUCCESS; + + uint num_elements = num; + + device_param->kernel_params_buf32[33] = data.combs_mode; + device_param->kernel_params_buf32[34] = num; + + uint kernel_threads = device_param->kernel_threads; + + while (num_elements % kernel_threads) num_elements++; + + cl_kernel kernel = NULL; + + switch (kern_run) + { + case KERN_RUN_1: kernel = device_param->kernel1; break; + case KERN_RUN_12: kernel = device_param->kernel12; break; + case KERN_RUN_2: kernel = device_param->kernel2; break; + case KERN_RUN_23: kernel = device_param->kernel23; break; + case KERN_RUN_3: kernel = device_param->kernel3; break; + } + + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 24, sizeof (cl_uint), device_param->kernel_params[24]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 25, sizeof (cl_uint), device_param->kernel_params[25]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 26, sizeof (cl_uint), device_param->kernel_params[26]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 27, sizeof (cl_uint), device_param->kernel_params[27]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 28, sizeof (cl_uint), device_param->kernel_params[28]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 29, sizeof (cl_uint), device_param->kernel_params[29]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 30, sizeof (cl_uint), device_param->kernel_params[30]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 31, sizeof (cl_uint), device_param->kernel_params[31]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 32, sizeof (cl_uint), device_param->kernel_params[32]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 33, sizeof (cl_uint), device_param->kernel_params[33]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 34, sizeof (cl_uint), device_param->kernel_params[34]); if (CL_err != CL_SUCCESS) { - log_error ("ERROR: clEnqueueReadBuffer(): %s\n", val2cstr_cl (CL_err)); + log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + cl_event event; + + if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (data.attack_mode == ATTACK_MODE_BF)) + { + const size_t global_work_size[3] = { num_elements, 32, 1 }; + const size_t local_work_size[3] = { kernel_threads / 32, 32, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else + { + if (kern_run == KERN_RUN_2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD) + { + num_elements = CEIL (num_elements / device_param->vector_width); + } + } + + while (num_elements % kernel_threads) num_elements++; + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &event); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + CL_err = hc_clFlush (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + if (device_param->nvidia_spin_damp > 0) + { + if (data.devices_status == STATUS_RUNNING) + { + if (iteration < EXPECTED_ITERATIONS) + { + switch (kern_run) + { + case KERN_RUN_1: if (device_param->exec_us_prev1[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev1[iteration] * device_param->nvidia_spin_damp)); break; + case KERN_RUN_2: if (device_param->exec_us_prev2[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev2[iteration] * device_param->nvidia_spin_damp)); break; + case KERN_RUN_3: if (device_param->exec_us_prev3[iteration] > 0) usleep ((useconds_t)(device_param->exec_us_prev3[iteration] * device_param->nvidia_spin_damp)); break; + } + } + } + } + + CL_err = hc_clWaitForEvents (data.ocl, 1, &event); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clWaitForEvents(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + cl_ulong time_start; + cl_ulong time_end; + + CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_START, sizeof (time_start), &time_start, NULL); + CL_err |= hc_clGetEventProfilingInfo (data.ocl, event, CL_PROFILING_COMMAND_END, sizeof (time_end), &time_end, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clGetEventProfilingInfo(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + const double exec_us = (double) (time_end - time_start) / 1000; + + if (data.devices_status == STATUS_RUNNING) + { + if (iteration < EXPECTED_ITERATIONS) + { + switch (kern_run) + { + case KERN_RUN_1: device_param->exec_us_prev1[iteration] = exec_us; break; + case KERN_RUN_2: device_param->exec_us_prev2[iteration] = exec_us; break; + case KERN_RUN_3: device_param->exec_us_prev3[iteration] = exec_us; break; + } + } + } + + if (event_update) + { + uint exec_pos = device_param->exec_pos; + + device_param->exec_ms[exec_pos] = exec_us / 1000; + + exec_pos++; + + if (exec_pos == EXEC_CACHE) + { + exec_pos = 0; + } + + device_param->exec_pos = exec_pos; + } + + CL_err = hc_clReleaseEvent (data.ocl, event); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clReleaseEvent(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFinish (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + return 0; +} + +int run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num) +{ + cl_int CL_err = CL_SUCCESS; + + uint num_elements = num; + + switch (kern_run) + { + case KERN_RUN_MP: device_param->kernel_params_mp_buf32[8] = num; break; + case KERN_RUN_MP_R: device_param->kernel_params_mp_r_buf32[8] = num; break; + case KERN_RUN_MP_L: device_param->kernel_params_mp_l_buf32[9] = num; break; + } + + // causes problems with special threads like in bcrypt + // const uint kernel_threads = device_param->kernel_threads; + + uint kernel_threads = device_param->kernel_threads; + + while (num_elements % kernel_threads) num_elements++; + + cl_kernel kernel = NULL; + + switch (kern_run) + { + case KERN_RUN_MP: kernel = device_param->kernel_mp; break; + case KERN_RUN_MP_R: kernel = device_param->kernel_mp_r; break; + case KERN_RUN_MP_L: kernel = device_param->kernel_mp_l; break; + } + + switch (kern_run) + { + case KERN_RUN_MP: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp[4]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp[5]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp[6]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp[7]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp[8]); + break; + case KERN_RUN_MP_R: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_r[3]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_r[4]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_r[5]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_r[6]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_r[7]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_r[8]); + break; + case KERN_RUN_MP_L: CL_err |= hc_clSetKernelArg (data.ocl, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp_l[3]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 4, sizeof (cl_uint), device_param->kernel_params_mp_l[4]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_mp_l[5]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_mp_l[6]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 7, sizeof (cl_uint), device_param->kernel_params_mp_l[7]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 8, sizeof (cl_uint), device_param->kernel_params_mp_l[8]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 9, sizeof (cl_uint), device_param->kernel_params_mp_l[9]); + break; + } + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFlush (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFinish (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + return 0; +} + +int run_kernel_tm (hc_device_param_t *device_param) +{ + cl_int CL_err = CL_SUCCESS; + + const uint num_elements = 1024; // fixed + + uint kernel_threads = 32; + + cl_kernel kernel = device_param->kernel_tm; + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFlush (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); return -1; } + CL_err = hc_clFinish (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + return 0; +} + +int run_kernel_amp (hc_device_param_t *device_param, const uint num) +{ + cl_int CL_err = CL_SUCCESS; + + uint num_elements = num; + + device_param->kernel_params_amp_buf32[5] = data.combs_mode; + device_param->kernel_params_amp_buf32[6] = num_elements; + + // causes problems with special threads like in bcrypt + // const uint kernel_threads = device_param->kernel_threads; + + uint kernel_threads = device_param->kernel_threads; + + while (num_elements % kernel_threads) num_elements++; + + cl_kernel kernel = device_param->kernel_amp; + + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 5, sizeof (cl_uint), device_param->kernel_params_amp[5]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 6, sizeof (cl_uint), device_param->kernel_params_amp[6]); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFlush (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFinish (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + return 0; +} + +int run_kernel_memset (hc_device_param_t *device_param, cl_mem buf, const uint value, const uint num) +{ + cl_int CL_err = CL_SUCCESS; + + const u32 num16d = num / 16; + const u32 num16m = num % 16; + + if (num16d) + { + device_param->kernel_params_memset_buf32[1] = value; + device_param->kernel_params_memset_buf32[2] = num16d; + + uint kernel_threads = device_param->kernel_threads; + + uint num_elements = num16d; + + while (num_elements % kernel_threads) num_elements++; + + cl_kernel kernel = device_param->kernel_memset; + + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 0, sizeof (cl_mem), (void *) &buf); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); + CL_err |= hc_clSetKernelArg (data.ocl, kernel, 2, sizeof (cl_uint), device_param->kernel_params_memset[2]); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clSetKernelArg(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + const size_t global_work_size[3] = { num_elements, 1, 1 }; + const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + CL_err = hc_clEnqueueNDRangeKernel (data.ocl, device_param->command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueNDRangeKernel(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFlush (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFlush(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + + CL_err = hc_clFinish (data.ocl, device_param->command_queue); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clFinish(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + if (num16m) + { + u32 tmp[4]; + + tmp[0] = value; + tmp[1] = value; + tmp[2] = value; + tmp[3] = value; + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, buf, CL_TRUE, num16d * 16, num16m, tmp, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + return 0; +} + +int run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const size_t size) +{ + return run_kernel_memset (device_param, buf, 0, size); +} + +int run_copy (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt) +{ + cl_int CL_err = CL_SUCCESS; + + if (data.attack_kern == ATTACK_KERN_STRAIGHT) + { + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_kern == ATTACK_KERN_COMBI) + { + if (data.attack_mode == ATTACK_MODE_COMBI) + { + if (data.combs_mode == COMBINATOR_MODE_BASE_RIGHT) + { + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) + { + for (u32 i = 0; i < pws_cnt; i++) + { + const u32 pw_len = device_param->pws_buf[i].pw_len; + + u8 *ptr = (u8 *) device_param->pws_buf[i].i; + + ptr[pw_len] = 0x01; + } + } + else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) + { + for (u32 i = 0; i < pws_cnt; i++) + { + const u32 pw_len = device_param->pws_buf[i].pw_len; + + u8 *ptr = (u8 *) device_param->pws_buf[i].i; + + ptr[pw_len] = 0x80; + } + } + } + } + else if (data.attack_mode == ATTACK_MODE_HYBRID2) + { + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) + { + for (u32 i = 0; i < pws_cnt; i++) + { + const u32 pw_len = device_param->pws_buf[i].pw_len; + + u8 *ptr = (u8 *) device_param->pws_buf[i].i; + + ptr[pw_len] = 0x01; + } + } + else if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) + { + for (u32 i = 0; i < pws_cnt; i++) + { + const u32 pw_len = device_param->pws_buf[i].pw_len; + + u8 *ptr = (u8 *) device_param->pws_buf[i].i; + + ptr[pw_len] = 0x80; + } + } + } + + CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_kern == ATTACK_KERN_BF) + { + const u64 off = device_param->words_off; + + device_param->kernel_params_mp_l_buf64[3] = off; + + run_kernel_mp (KERN_RUN_MP_L, device_param, pws_cnt); + } + + return 0; +} + +int run_cracker (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint pws_cnt) +{ + char *line_buf = (char *) mymalloc (HCBUFSIZ_LARGE); + + // init speed timer + + uint speed_pos = device_param->speed_pos; + + #if defined (_POSIX) + if (device_param->timer_speed.tv_sec == 0) + { + hc_timer_set (&device_param->timer_speed); + } + #endif + + #if defined (_WIN) + if (device_param->timer_speed.QuadPart == 0) + { + hc_timer_set (&device_param->timer_speed); + } + #endif + + // find higest password length, this is for optimization stuff + + uint highest_pw_len = 0; + + if (data.attack_kern == ATTACK_KERN_STRAIGHT) + { + } + else if (data.attack_kern == ATTACK_KERN_COMBI) + { + } + else if (data.attack_kern == ATTACK_KERN_BF) + { + highest_pw_len = device_param->kernel_params_mp_l_buf32[4] + + device_param->kernel_params_mp_l_buf32[5]; + } + + // iteration type + + uint innerloop_step = 0; + uint innerloop_cnt = 0; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) innerloop_step = device_param->kernel_loops; + else innerloop_step = 1; + + if (data.attack_kern == ATTACK_KERN_STRAIGHT) innerloop_cnt = data.kernel_rules_cnt; + else if (data.attack_kern == ATTACK_KERN_COMBI) innerloop_cnt = data.combs_cnt; + else if (data.attack_kern == ATTACK_KERN_BF) innerloop_cnt = data.bfs_cnt; + + // loop start: most outer loop = salt iteration, then innerloops (if multi) + + for (uint salt_pos = 0; salt_pos < data.salts_cnt; salt_pos++) + { + while (data.devices_status == STATUS_PAUSED) hc_sleep (1); + + if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); + + if (data.devices_status == STATUS_CRACKED) break; + if (data.devices_status == STATUS_ABORTED) break; + if (data.devices_status == STATUS_QUIT) break; + if (data.devices_status == STATUS_BYPASS) break; + + salt_t *salt_buf = &data.salts_buf[salt_pos]; + + device_param->kernel_params_buf32[27] = salt_pos; + device_param->kernel_params_buf32[31] = salt_buf->digests_cnt; + device_param->kernel_params_buf32[32] = salt_buf->digests_offset; + + FILE *combs_fp = device_param->combs_fp; + + if (data.attack_mode == ATTACK_MODE_COMBI) + { + rewind (combs_fp); + } + + // innerloops + + for (uint innerloop_pos = 0; innerloop_pos < innerloop_cnt; innerloop_pos += innerloop_step) + { + while (data.devices_status == STATUS_PAUSED) hc_sleep (1); + + if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); + + if (data.devices_status == STATUS_CRACKED) break; + if (data.devices_status == STATUS_ABORTED) break; + if (data.devices_status == STATUS_QUIT) break; + if (data.devices_status == STATUS_BYPASS) break; + + uint fast_iteration = 0; + + uint innerloop_left = innerloop_cnt - innerloop_pos; + + if (innerloop_left > innerloop_step) + { + innerloop_left = innerloop_step; + + fast_iteration = 1; + } + + device_param->innerloop_pos = innerloop_pos; + device_param->innerloop_left = innerloop_left; + + device_param->kernel_params_buf32[30] = innerloop_left; + + // i think we can get rid of this + if (innerloop_left == 0) + { + puts ("bug, how should this happen????\n"); + + continue; + } + + if (data.salts_shown[salt_pos] == 1) + { + data.words_progress_done[salt_pos] += (u64) pws_cnt * (u64) innerloop_left; + + continue; + } + + // initialize amplifiers + + if (data.attack_mode == ATTACK_MODE_COMBI) + { + uint i = 0; + + while (i < innerloop_left) + { + if (feof (combs_fp)) break; + + int line_len = fgetl (combs_fp, line_buf); + + if (line_len >= PW_MAX1) continue; + + line_len = convert_from_hex (line_buf, line_len); + + char *line_buf_new = line_buf; + + if (run_rule_engine (data.rule_len_r, data.rule_buf_r)) + { + char rule_buf_out[BLOCK_SIZE] = { 0 }; + + int rule_len_out = _old_apply_rule (data.rule_buf_r, data.rule_len_r, line_buf, line_len, rule_buf_out); + + if (rule_len_out < 0) + { + data.words_progress_rejected[salt_pos] += pws_cnt; + + continue; + } + + line_len = rule_len_out; + + line_buf_new = rule_buf_out; + } + + line_len = MIN (line_len, PW_DICTMAX); + + u8 *ptr = (u8 *) device_param->combs_buf[i].i; + + memcpy (ptr, line_buf_new, line_len); + + memset (ptr + line_len, 0, PW_DICTMAX1 - line_len); + + if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER) + { + uppercase (ptr, line_len); + } + + if (data.combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) + { + ptr[line_len] = 0x80; + } + + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) + { + ptr[line_len] = 0x01; + } + } + + device_param->combs_buf[i].pw_len = line_len; + + i++; + } + + for (uint j = i; j < innerloop_left; j++) + { + device_param->combs_buf[j].i[0] = 0; + device_param->combs_buf[j].i[1] = 0; + device_param->combs_buf[j].i[2] = 0; + device_param->combs_buf[j].i[3] = 0; + device_param->combs_buf[j].i[4] = 0; + device_param->combs_buf[j].i[5] = 0; + device_param->combs_buf[j].i[6] = 0; + device_param->combs_buf[j].i[7] = 0; + + device_param->combs_buf[j].pw_len = 0; + } + + innerloop_left = i; + } + else if (data.attack_mode == ATTACK_MODE_BF) + { + u64 off = innerloop_pos; + + device_param->kernel_params_mp_r_buf64[3] = off; + + run_kernel_mp (KERN_RUN_MP_R, device_param, innerloop_left); + } + else if (data.attack_mode == ATTACK_MODE_HYBRID1) + { + u64 off = innerloop_pos; + + device_param->kernel_params_mp_buf64[3] = off; + + run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); + } + else if (data.attack_mode == ATTACK_MODE_HYBRID2) + { + u64 off = innerloop_pos; + + device_param->kernel_params_mp_buf64[3] = off; + + run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); + } + + // copy amplifiers + + if (data.attack_mode == ATTACK_MODE_STRAIGHT) + { + cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_mode == ATTACK_MODE_COMBI) + { + cl_int CL_err = hc_clEnqueueWriteBuffer (data.ocl, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (comb_t), device_param->combs_buf, 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueWriteBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_mode == ATTACK_MODE_BF) + { + cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_bfs, device_param->d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_mode == ATTACK_MODE_HYBRID1) + { + cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + else if (data.attack_mode == ATTACK_MODE_HYBRID2) + { + cl_int CL_err = hc_clEnqueueCopyBuffer (data.ocl, device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL); + + if (CL_err != CL_SUCCESS) + { + log_error ("ERROR: clEnqueueCopyBuffer(): %s\n", val2cstr_cl (CL_err)); + + return -1; + } + } + + if (data.benchmark == 1) + { + hc_timer_set (&device_param->timer_speed); + } + + int rc = choose_kernel (device_param, hashconfig, hashconfig->attack_exec, data.attack_mode, hashconfig->opts_type, salt_buf, highest_pw_len, pws_cnt, fast_iteration); + + if (rc == -1) return -1; + + if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint (); + + if (data.devices_status == STATUS_CRACKED) break; + if (data.devices_status == STATUS_ABORTED) break; + if (data.devices_status == STATUS_QUIT) break; + if (data.devices_status == STATUS_BYPASS) break; + + /** + * result + */ + + if (data.benchmark == 0) + { + check_cracked (device_param, salt_pos, hashconfig); + } + + /** + * progress + */ + + u64 perf_sum_all = (u64) pws_cnt * (u64) innerloop_left; + + hc_thread_mutex_lock (mux_counter); + + data.words_progress_done[salt_pos] += perf_sum_all; + + hc_thread_mutex_unlock (mux_counter); + + /** + * speed + */ + + double speed_ms; + + hc_timer_get (device_param->timer_speed, speed_ms); + + hc_timer_set (&device_param->timer_speed); + + // current speed + + //hc_thread_mutex_lock (mux_display); + + device_param->speed_cnt[speed_pos] = perf_sum_all; + + device_param->speed_ms[speed_pos] = speed_ms; + + //hc_thread_mutex_unlock (mux_display); + + speed_pos++; + + if (speed_pos == SPEED_CACHE) + { + speed_pos = 0; + } + + /** + * benchmark + */ + + if (data.benchmark == 1) break; + } + } + + device_param->speed_pos = speed_pos; + + myfree (line_buf); + return 0; } diff --git a/src/restore.c b/src/restore.c index 82ebb94b7..4a8809476 100644 --- a/src/restore.c +++ b/src/restore.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "logging.h" @@ -17,7 +18,6 @@ #include "thread.h" #include "rp_cpu.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "opencl.h" #include "restore.h" @@ -25,6 +25,7 @@ #include "potfile.h" #include "debugfile.h" #include "loopback.h" +#include "status.h" #include "data.h" extern hc_global_data_t data; @@ -300,3 +301,55 @@ void cycle_restore () log_info ("WARN: Rename file '%s' to '%s': %s", new_restore_file, eff_restore_file, strerror (errno)); } } + +void check_checkpoint () +{ + // if (data.restore_disable == 1) break; (this is already implied by previous checks) + + u64 words_cur = get_lowest_words_done (); + + if (words_cur != data.checkpoint_cur_words) + { + myabort (); + } +} + +void stop_at_checkpoint () +{ + if (data.devices_status != STATUS_STOP_AT_CHECKPOINT) + { + if (data.devices_status != STATUS_RUNNING) return; + } + + // this feature only makes sense if --restore-disable was not specified + + if (data.restore_disable == 1) + { + log_info ("WARNING: This feature is disabled when --restore-disable is specified"); + + return; + } + + // check if monitoring of Restore Point updates should be enabled or disabled + + if (data.devices_status != STATUS_STOP_AT_CHECKPOINT) + { + data.devices_status = STATUS_STOP_AT_CHECKPOINT; + + // save the current restore point value + + data.checkpoint_cur_words = get_lowest_words_done (); + + log_info ("Checkpoint enabled: Will quit at next Restore Point update"); + } + else + { + data.devices_status = STATUS_RUNNING; + + // reset the global value for checkpoint checks + + data.checkpoint_cur_words = 0; + + log_info ("Checkpoint disabled: Restore Point updates will no longer be monitored"); + } +} diff --git a/src/rp_cpu.c b/src/rp_cpu.c index b4ad0ed76..84c5929a2 100644 --- a/src/rp_cpu.c +++ b/src/rp_cpu.c @@ -10,6 +10,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "logging.h" @@ -23,7 +24,6 @@ #include "rp_cpu.h" #include "terminal.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "rp_cpu.h" #include "opencl.h" diff --git a/src/status.c b/src/status.c index c7598f6d9..e5a3c0fd2 100644 --- a/src/status.c +++ b/src/status.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "logging.h" @@ -18,11 +19,9 @@ #include "rp_cpu.h" #include "terminal.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "opencl.h" #include "restore.h" -#include "interface.h" #include "outfile.h" #include "potfile.h" #include "debugfile.h" @@ -47,6 +46,7 @@ extern hc_global_data_t data; extern hc_thread_mutex_t mux_hwmon; hc_thread_mutex_t mux_display; +hc_thread_mutex_t mux_counter; static void format_timer_display (struct tm *tm, char *buf, size_t len) { @@ -143,6 +143,33 @@ static char *strstatus (const uint devices_status) return ((char *) "Unknown"); } +double get_avg_exec_time (hc_device_param_t *device_param, const int last_num_entries) +{ + int exec_pos = (int) device_param->exec_pos - last_num_entries; + + if (exec_pos < 0) exec_pos += EXEC_CACHE; + + double exec_ms_sum = 0; + + int exec_ms_cnt = 0; + + for (int i = 0; i < last_num_entries; i++) + { + double exec_ms = device_param->exec_ms[(exec_pos + i) % EXEC_CACHE]; + + if (exec_ms > 0) + { + exec_ms_sum += exec_ms; + + exec_ms_cnt++; + } + } + + if (exec_ms_cnt == 0) return 0; + + return exec_ms_sum / exec_ms_cnt; +} + void status_display_machine_readable () { FILE *out = stdout; diff --git a/src/stdout.c b/src/stdout.c index 18ad5c2db..87c240f57 100644 --- a/src/stdout.c +++ b/src/stdout.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "convert.h" @@ -21,10 +22,8 @@ #include "locking.h" #include "rp_cpu.h" #include "rp_kernel_on_cpu.h" -#include "interface.h" #include "shared.h" #include "hwmon.h" -#include "interface.h" #include "mpsp.h" #include "restore.h" #include "outfile.h" diff --git a/src/thread.c b/src/thread.c index 244fbfc5d..af83e0f65 100644 --- a/src/thread.c +++ b/src/thread.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "ext_OpenCL.h" #include "ext_ADL.h" @@ -16,7 +17,6 @@ #include "thread.h" #include "rp_cpu.h" #include "terminal.h" -#include "interface.h" #include "hwmon.h" #include "mpsp.h" #include "restore.h" diff --git a/src/tuningdb.c b/src/tuningdb.c index 8811359f3..6d035f6a9 100644 --- a/src/tuningdb.c +++ b/src/tuningdb.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "logging.h" #include "memory.h" diff --git a/src/weak_hash.c b/src/weak_hash.c index fccf35681..40fda5adf 100644 --- a/src/weak_hash.c +++ b/src/weak_hash.c @@ -4,4 +4,110 @@ */ #include "common.h" +#include "types_int.h" +#include "types.h" +#include "interface.h" +#include "timer.h" +#include "logging.h" +#include "ext_OpenCL.h" +#include "ext_ADL.h" +#include "ext_nvapi.h" +#include "ext_nvml.h" +#include "ext_xnvctrl.h" +#include "hwmon.h" +#include "mpsp.h" +#include "rp_cpu.h" +#include "restore.h" +#include "opencl.h" +#include "outfile.h" +#include "potfile.h" +#include "debugfile.h" +#include "loopback.h" +#include "data.h" +#include "hash_management.h" #include "weak_hash.h" + +extern hc_global_data_t data; + +void weak_hash_check (hc_device_param_t *device_param, hashconfig_t *hashconfig, const uint salt_pos) +{ + if (device_param == NULL) + { + log_error ("ERROR: %s : Invalid argument", __func__); + + exit (-1); + } + + salt_t *salt_buf = &data.salts_buf[salt_pos]; + + device_param->kernel_params_buf32[27] = salt_pos; + device_param->kernel_params_buf32[30] = 1; + device_param->kernel_params_buf32[31] = salt_buf->digests_cnt; + device_param->kernel_params_buf32[32] = salt_buf->digests_offset; + device_param->kernel_params_buf32[33] = 0; + device_param->kernel_params_buf32[34] = 1; + + char *dictfile_old = data.dictfile; + + const char *weak_hash_check = "weak-hash-check"; + + data.dictfile = (char *) weak_hash_check; + + uint cmd0_rule_old = data.kernel_rules_buf[0].cmds[0]; + + data.kernel_rules_buf[0].cmds[0] = 0; + + /** + * run the kernel + */ + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + run_kernel (KERN_RUN_1, device_param, 1, false, 0, hashconfig); + } + else + { + run_kernel (KERN_RUN_1, device_param, 1, false, 0, hashconfig); + + uint loop_step = 16; + + const uint iter = salt_buf->salt_iter; + + for (uint loop_pos = 0; loop_pos < iter; loop_pos += loop_step) + { + uint loop_left = iter - loop_pos; + + loop_left = MIN (loop_left, loop_step); + + device_param->kernel_params_buf32[28] = loop_pos; + device_param->kernel_params_buf32[29] = loop_left; + + run_kernel (KERN_RUN_2, device_param, 1, false, 0, hashconfig); + } + + run_kernel (KERN_RUN_3, device_param, 1, false, 0, hashconfig); + } + + /** + * result + */ + + check_cracked (device_param, salt_pos, hashconfig); + + /** + * cleanup + */ + + device_param->kernel_params_buf32[27] = 0; + device_param->kernel_params_buf32[28] = 0; + device_param->kernel_params_buf32[29] = 0; + device_param->kernel_params_buf32[30] = 0; + device_param->kernel_params_buf32[31] = 0; + device_param->kernel_params_buf32[32] = 0; + device_param->kernel_params_buf32[33] = 0; + device_param->kernel_params_buf32[34] = 0; + + data.dictfile = dictfile_old; + + data.kernel_rules_buf[0].cmds[0] = cmd0_rule_old; +} diff --git a/src/wordlist.c b/src/wordlist.c index 0cd8f4b52..2c02bea78 100644 --- a/src/wordlist.c +++ b/src/wordlist.c @@ -6,6 +6,7 @@ #include "common.h" #include "types_int.h" #include "types.h" +#include "interface.h" #include "timer.h" #include "memory.h" #include "convert.h" @@ -21,7 +22,6 @@ #include "locking.h" #include "rp_cpu.h" #include "rp_kernel_on_cpu.h" -#include "interface.h" #include "shared.h" #include "hwmon.h" #include "thread.h"