1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-08 07:38:17 +00:00

Fix OpenCL spawning unnecessary work-item due to redundant multiplication in new OPTS_TYPE_THREAD_MULTI_DISABLE mode.

Prepare Metal section in run_kernel() for 2D kernel invocation related to new salt->salt_dimy variable.
Move reusable Argon2 module code into separate file argon2_commit.c, similar to scrypt_commit.c, and update headers.
Update existing hash mode 34000 to use argon2_commit.c.
This commit is contained in:
Jens Steube 2025-07-03 08:10:30 +02:00
parent 2e185b5450
commit e8cf8bd146
3 changed files with 221 additions and 171 deletions

View File

@ -2860,7 +2860,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
if (kernel_threads == 0) kernel_threads = 1;
num_elements = round_up_multiple_32 (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
{
num_elements = round_up_multiple_32 (num_elements, kernel_threads);
}
if (kern_run == KERN_RUN_1)
{
@ -2898,10 +2901,37 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
}
}
num_elements = round_up_multiple_32 (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
{
num_elements = round_up_multiple_32 (num_elements, kernel_threads);
}
else
{
num_elements = num_elements * kernel_threads;
}
const size_t global_work_size[3] = { num_elements, 1, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
size_t global_work_size[3] = { num_elements, 1, 1 };
size_t local_work_size[3] = { kernel_threads, 1, 1 };
cl_uint work_dim = 1;
if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
{
global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
work_dim = 2;
}
if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
{
global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
work_dim = 2;
}
if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
{
global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
work_dim = 2;
}
double ms = 0;
@ -2997,7 +3027,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
}
*/
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
{
num_elements = round_up_multiple_64 (num_elements, kernel_threads);
}
cl_event opencl_event;

174
src/modules/argon2_common.c Normal file
View File

@ -0,0 +1,174 @@
/**
* Author......: Netherlands Forensic Institute
* License.....: MIT
*/
#include <inttypes.h>
#include "common.h"
#include "types.h"
#include "modules.h"
#include "bitops.h"
#include "convert.h"
#include "shared.h"
#include "memory.h"
#define ARGON2_SYNC_POINTS 4
#define ARGON2_BLOCK_SIZE 1024
typedef struct argon2_tmp
{
u32 state[4]; // just something for now
} argon2_tmp_t;
typedef struct argon2_options
{
u32 type;
u32 version;
u32 iterations;
u32 parallelism;
u32 memory_usage_in_kib;
u32 segment_length;
u32 lane_length;
u32 memory_block_count;
u32 digest_len;
} argon2_options_t;
u32 argon2_module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 32; // hard-coded in kernel
return kernel_threads_min;
}
u32 argon2_module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32; // hard-coded in kernel
return kernel_threads_max;
}
u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 tmp_size = 0; // we'll add some later
return tmp_size;
}
const char *argon2_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
const u32 device_processors = device_param->device_processors;
const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
u32 kernel_accel_new = device_processors;
if (kernel_accel_user)
{
kernel_accel_new = kernel_accel_user;
}
else
{
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
{
kernel_accel_new = available_mem / size_per_accel;
kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
}
}
char *new_device_name = hcstrdup (device_param->device_name);
for (size_t i = 0; i < strlen (new_device_name); i++)
{
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
return lines_buf;
}
u64 argon2_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
return size_argon2;
}
u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u32 parallelism = (options->parallelism) ? options->parallelism : options_st->parallelism;
for (u32 i = 1; i < hashes->salts_cnt; i++)
{
if ((memory_block_count != options[i].memory_block_count)
|| (parallelism != options[i].parallelism))
{
return (1ULL << 63) + i;
}
}
// now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
{
if ((memory_block_count != options_st->memory_block_count)
|| (parallelism != options_st->parallelism))
{
return (1ULL << 62);
}
}
u64 tmp_size = sizeof (argon2_tmp_t);
return tmp_size;
}
char *argon2_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
char *jit_build_options = NULL;
hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
return jit_build_options;
}

View File

@ -11,9 +11,6 @@
#include "shared.h"
#include "memory.h"
#define ARGON2_SYNC_POINTS 4
#define ARGON2_BLOCK_SIZE 1024
static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL;
static const u32 DGST_POS0 = 0;
static const u32 DGST_POS1 = 1;
@ -48,28 +45,7 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig,
const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; }
const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; }
typedef struct argon2_tmp
{
u32 state[4]; // just something for now
} argon2_tmp_t;
typedef struct argon2_options
{
u32 type;
u32 version;
u32 iterations;
u32 parallelism;
u32 memory_usage_in_kib;
u32 segment_length;
u32 lane_length;
u32 memory_block_count;
u32 digest_len;
} argon2_options_t;
#include "argon2_common.c"
static const char *SIGNATURE_ARGON2D = "$argon2d$";
static const char *SIGNATURE_ARGON2I = "$argon2i$";
@ -82,140 +58,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
return esalt_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 32; // hard-coded in kernel
return kernel_threads_min;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 32; // hard-coded in kernel
return kernel_threads_max;
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 tmp_size = 0; // we'll add some later
return tmp_size;
}
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
const u32 device_processors = device_param->device_processors;
const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
u32 kernel_accel_new = device_processors;
if (kernel_accel_user)
{
kernel_accel_new = kernel_accel_user;
}
else
{
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
{
kernel_accel_new = available_mem / size_per_accel;
kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
}
}
char *new_device_name = hcstrdup (device_param->device_name);
for (size_t i = 0; i < strlen (new_device_name); i++)
{
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
return lines_buf;
}
u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
return size_argon2;
}
u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u32 parallelism = (options->parallelism) ? options->parallelism : options_st->parallelism;
for (u32 i = 1; i < hashes->salts_cnt; i++)
{
if ((memory_block_count != options[i].memory_block_count)
|| (parallelism != options[i].parallelism))
{
return (1ULL << 63) + i;
}
}
// now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
{
if ((memory_block_count != options_st->memory_block_count)
|| (parallelism != options_st->parallelism))
{
return (1ULL << 62);
}
}
u64 tmp_size = sizeof (argon2_tmp_t);
return tmp_size;
}
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
char *jit_build_options = NULL;
hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
return jit_build_options;
}
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
{
u32 *digest = (u32 *) digest_buf;
@ -393,9 +235,9 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_dgst_size = module_dgst_size;
module_ctx->module_dictstat_disable = MODULE_DEFAULT;
module_ctx->module_esalt_size = module_esalt_size;
module_ctx->module_extra_buffer_size = module_extra_buffer_size;
module_ctx->module_extra_tmp_size = module_extra_tmp_size;
module_ctx->module_extra_tuningdb_block = module_extra_tuningdb_block;
module_ctx->module_extra_buffer_size = argon2_module_extra_buffer_size;
module_ctx->module_extra_tmp_size = argon2_module_extra_tmp_size;
module_ctx->module_extra_tuningdb_block = argon2_module_extra_tuningdb_block;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;
@ -421,14 +263,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_build_options = argon2_module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = argon2_module_kernel_threads_max;
module_ctx->module_kernel_threads_min = argon2_module_kernel_threads_min;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;
@ -447,7 +289,8 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_separator = MODULE_DEFAULT;
module_ctx->module_st_hash = module_st_hash;
module_ctx->module_st_pass = module_st_pass;
module_ctx->module_tmp_size = module_tmp_size;
module_ctx->module_tmp_size = argon2_module_tmp_size;
module_ctx->module_unstable_warning = MODULE_DEFAULT;
module_ctx->module_warmup_disable = MODULE_DEFAULT;
}