From b98d5d5f8a81230c712d6602ccbd84ba9f9bdad5 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Tue, 8 Jul 2025 13:21:10 +0200 Subject: [PATCH] Fixed out-of-boundary read for -a 9 when using the new OPTS_TYPE_THREAD_MULTI_DISABLE parameter. This only affected Argon2. Fixed compiler warnings in inc_hash_argon2.cl. Moved argon2_tmp_t and argon2_extra_t typedefs from argon2_common.c back to the module to allow plugin developers to modify them when using Argon2 as a primitive. Slightly improved autotune behavior for edge cases such as 8700 and 18600, where some algorithms started with theoretical excessively high value, leaving no room for proper tuning. Removed argon2_module_kernel_threads_min() and argon2_module_kernel_threads_max() from argon2_common.c. Switched to using OPTS_TYPE_NATIVE_THREADS instead. Plugin developers can still use it. This simplifies CPU integration, as CPUs typically run with a single thread. Updated plugins 15500 and 20510. Added a thread limit to prevent autotune from selecting an excessively high thread count. The issue originated from the runtime returning an unrealistically high ideal thread count. --- OpenCL/inc_hash_argon2.cl | 6 +++--- OpenCL/inc_types.h | 6 +++++- OpenCL/m34000-pure.cl | 2 +- src/autotune.c | 34 ++++++++++++++++++++++++++++++++++ src/modules/argon2_common.c | 37 ------------------------------------- src/modules/module_15500.c | 9 ++++++++- src/modules/module_20510.c | 9 ++++++++- src/modules/module_34000.c | 28 ++++++++++++++++++++++++++-- 8 files changed, 85 insertions(+), 46 deletions(-) diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl index c87179d2c..f9aba1cef 100644 --- a/OpenCL/inc_hash_argon2.cl +++ b/OpenCL/inc_hash_argon2.cl @@ -43,7 +43,7 @@ DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, co for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx]; blake2b_init (&ctx); - blake2b_transform (ctx.h, blake_buf, 64, BLAKE2B_FINAL); + blake2b_transform (ctx.h, blake_buf, 64, (u64) BLAKE2B_FINAL); out[off + 0] = ctx.h[0]; out[off + 1] = ctx.h[1]; @@ -279,7 +279,7 @@ DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index) return array[3]; } - return -1; + return (u32) -1; } DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 argon2_thread) @@ -386,7 +386,7 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a blake2b_init (&ctx); // Override default (0x40) value in BLAKE2b - ctx.h[0] ^= 0x40 ^ options->digest_len; + ctx.h[0] ^= 0x40 ^ options->digest_len; blake2b_update (&ctx, output_len, 4); blake2b_update (&ctx, (PRIVATE_AS u32 *) final_block.values, sizeof(final_block)); diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index a13c89b8e..e5a0516e1 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -11,11 +11,13 @@ #define BITMAP_SHIFT1 kernel_param->bitmap_shift1 #define BITMAP_SHIFT2 kernel_param->bitmap_shift2 #define SALT_POS_HOST (kernel_param->pws_pos + gid) +#define SALT_POS_HOST_BID (kernel_param->pws_pos + bid) #define LOOP_POS kernel_param->loop_pos #define LOOP_CNT kernel_param->loop_cnt #define IL_CNT kernel_param->il_cnt #define DIGESTS_CNT 1 -#define DIGESTS_OFFSET_HOST (kernel_param->pws_pos + gid) +#define DIGESTS_OFFSET_HOST (kernel_param->pws_pos + gid) +#define DIGESTS_OFFSET_HOST_BID (kernel_param->pws_pos + bid) #define COMBS_MODE kernel_param->combs_mode #define SALT_REPEAT kernel_param->salt_repeat #define PWS_POS kernel_param->pws_pos @@ -25,11 +27,13 @@ #define BITMAP_SHIFT1 kernel_param->bitmap_shift1 #define BITMAP_SHIFT2 kernel_param->bitmap_shift2 #define SALT_POS_HOST kernel_param->salt_pos_host +#define SALT_POS_HOST_BID SALT_POS_HOST #define LOOP_POS kernel_param->loop_pos #define LOOP_CNT kernel_param->loop_cnt #define IL_CNT kernel_param->il_cnt #define DIGESTS_CNT kernel_param->digests_cnt #define DIGESTS_OFFSET_HOST kernel_param->digests_offset_host +#define DIGESTS_OFFSET_HOST_BID DIGESTS_OFFSET_HOST #define COMBS_MODE kernel_param->combs_mode #define SALT_REPEAT kernel_param->salt_repeat #define PWS_POS kernel_param->pws_pos diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl index ba87d835c..688291972 100644 --- a/OpenCL/m34000-pure.cl +++ b/OpenCL/m34000-pure.cl @@ -86,7 +86,7 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2 GLOBAL_AS argon2_extra_t *argon2_extra = V + bd4; - argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST]; + argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST_BID]; options.parallelism = ARGON2_PARALLELISM; diff --git a/src/autotune.c b/src/autotune.c index e8f704599..cf233a88f 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -403,6 +403,40 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } } + if (1) + { + // some algorithm start ways to high with these theoretical preset (for instance, 8700) + // so much that they can't be tuned anymore + + while ((kernel_accel > kernel_accel_min) || (kernel_threads > kernel_threads_min) || (kernel_loops > kernel_loops_min)) + { + double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2); + + if (exec_msec < target_msec / 16) break; + + if (kernel_accel > kernel_accel_min) + { + kernel_accel = MAX (kernel_accel / 2, kernel_accel_min); + + continue; + } + + if (kernel_threads > kernel_threads_min) + { + kernel_threads = MAX (kernel_threads / 2, kernel_threads_min); + + continue; + } + + if (kernel_loops > kernel_loops_min) + { + kernel_loops = MAX (kernel_loops / 2, kernel_loops_min); + + continue; + } + } + } + for (u32 kernel_loops_test = kernel_loops; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1) { double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops_test, kernel_threads, 2); diff --git a/src/modules/argon2_common.c b/src/modules/argon2_common.c index cfe47071e..e6ebf9e1a 100644 --- a/src/modules/argon2_common.c +++ b/src/modules/argon2_common.c @@ -15,43 +15,6 @@ #define ARGON2_SYNC_POINTS 4 #define ARGON2_BLOCK_SIZE 1024 -typedef struct argon2_tmp -{ - u32 state[4]; // just something for now - -} argon2_tmp_t; - -typedef struct argon2_options -{ - u32 type; - u32 version; - - u32 iterations; - u32 parallelism; - u32 memory_usage_in_kib; - - u32 segment_length; - u32 lane_length; - u32 memory_block_count; - - u32 digest_len; - -} argon2_options_t; - -u32 argon2_module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 32; // hard-coded in kernel - - return kernel_threads_min; -} - -u32 argon2_module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 32; // hard-coded in kernel - - return kernel_threads_max; -} - u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 tmp_size = 0; // we'll add some later diff --git a/src/modules/module_15500.c b/src/modules/module_15500.c index cd8f4639b..720b6a785 100644 --- a/src/modules/module_15500.c +++ b/src/modules/module_15500.c @@ -67,6 +67,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED return esalt_size; } +u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + u32 kernel_threads_max = 256; + + return kernel_threads_max; +} + u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL); @@ -328,7 +335,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = module_kernel_threads_max; module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; diff --git a/src/modules/module_20510.c b/src/modules/module_20510.c index 394a69a3b..e92d96d6c 100644 --- a/src/modules/module_20510.c +++ b/src/modules/module_20510.c @@ -115,6 +115,13 @@ u32 module_hashes_count_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_ return tmp_size; } +u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + u32 kernel_threads_max = 256; + + return kernel_threads_max; +} + u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 tmp_size = (const u64) sizeof (pkzip_extra_t); @@ -249,7 +256,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = module_kernel_threads_max; module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; diff --git a/src/modules/module_34000.c b/src/modules/module_34000.c index 34b621133..39d73df80 100644 --- a/src/modules/module_34000.c +++ b/src/modules/module_34000.c @@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_SLOW_HASH_DIMY_LOOP; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_THREAD_MULTI_DISABLE | OPTS_TYPE_MP_MULTI_DISABLE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; @@ -45,6 +46,29 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +typedef struct argon2_tmp +{ + u32 state[4]; // just something + +} argon2_tmp_t; + +typedef struct argon2_options +{ + u32 type; + u32 version; + + u32 iterations; + u32 parallelism; + u32 memory_usage_in_kib; + + u32 segment_length; + u32 lane_length; + u32 memory_block_count; + + u32 digest_len; + +} argon2_options_t; + #include "argon2_common.c" static const char *SIGNATURE_ARGON2D = "$argon2d$"; @@ -269,8 +293,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = argon2_module_kernel_threads_max; - module_ctx->module_kernel_threads_min = argon2_module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type;