From 8e47fdf8f5c535e350daa4b4b36b2153c206d6f8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 19 Apr 2021 10:27:51 +0200 Subject: [PATCH] Add 4 times single workitem extra buffer size to total extra buffer size to workaround single workitem buffer overflows --- hashcat.hctune | 3 +-- src/backend.c | 15 +++++++++++- src/modules/module_08900.c | 34 ++++++++++++++++++++------ src/modules/module_09300.c | 28 ++++++++++++++++++++- src/modules/module_15700.c | 50 +++++++++++++++++++++----------------- src/modules/module_22700.c | 38 ++++++++++++++++++++++------- 6 files changed, 126 insertions(+), 42 deletions(-) diff --git a/hashcat.hctune b/hashcat.hctune index de055ea92..ee3446be1 100644 --- a/hashcat.hctune +++ b/hashcat.hctune @@ -466,13 +466,12 @@ DEVICE_TYPE_GPU * 22700 1 N ## Find the ideal -n value, then store it here along with the proper compute device name. ## Formatting guidelines are availabe at the top of this document. - GeForce_GTX_980 * 8900 1 28 1 GeForce_GTX_980 * 9300 1 128 1 GeForce_GTX_980 * 15700 1 1 1 GeForce_GTX_980 * 22700 1 28 1 -GeForce_RTX_2080_Ti * 8900 1 68 1 +GeForce_RTX_2080_Ti * 8900 1 N 1 GeForce_RTX_2080_Ti * 9300 1 544 1 GeForce_RTX_2080_Ti * 15700 1 4 1 GeForce_RTX_2080_Ti * 22700 1 N 1 diff --git a/src/backend.c b/src/backend.c index dbb8300e6..583d0712d 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8360,7 +8360,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->extra_buffer_size = extra_buffer_size; - size_extra_buffer = extra_buffer_size; + // for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer. + // + // we will divide this size by 4 to workaround opencl limitation. + // this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem, + // because inside the kernel the target buffer is selected by workitem_id / 4. + // but the maximum size of the buffer would be only 1/4 of what is needed -> overflow. + // + // to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available. + + const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max; + + const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max; + + size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4); } // kern type diff --git a/src/modules/module_08900.c b/src/modules/module_08900.c index 03de4b089..277b90330 100644 --- a/src/modules/module_08900.c +++ b/src/modules/module_08900.c @@ -58,12 +58,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE return true; } - // w_opencl_runtime_p_2021.2.0.616.exe: password not found - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) - { - return true; - } - return false; } @@ -132,6 +126,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 size_hooks = kernel_power_max * hashconfig->hook_size; + u64 size_pws_pre = 4; + u64 size_pws_base = 4; + + if (user_options->slow_candidates == true) + { + // size_pws_pre + + size_pws_pre = kernel_power_max * sizeof (pw_pre_t); + + // size_pws_base + + size_pws_base = kernel_power_max * sizeof (pw_pre_t); + } + + // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. + // let's add some extra space just to be sure. + // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit + + u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; + + EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); + EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); + const u64 scrypt_extra_space = device_param->size_bfs + device_param->size_combs @@ -154,7 +171,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE + size_pws_comp + size_pws_idx + size_tmps - + size_hooks; + + size_hooks + + size_pws_pre + + size_pws_base + + EXTRA_SPACE; bool not_enough_memory = true; diff --git a/src/modules/module_09300.c b/src/modules/module_09300.c index 19b3456aa..73b130663 100644 --- a/src/modules/module_09300.c +++ b/src/modules/module_09300.c @@ -115,6 +115,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 size_hooks = kernel_power_max * hashconfig->hook_size; + u64 size_pws_pre = 4; + u64 size_pws_base = 4; + + if (user_options->slow_candidates == true) + { + // size_pws_pre + + size_pws_pre = kernel_power_max * sizeof (pw_pre_t); + + // size_pws_base + + size_pws_base = kernel_power_max * sizeof (pw_pre_t); + } + + // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. + // let's add some extra space just to be sure. + // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit + + u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; + + EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); + EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); + const u64 scrypt_extra_space = device_param->size_bfs + device_param->size_combs @@ -137,7 +160,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE + size_pws_comp + size_pws_idx + size_tmps - + size_hooks; + + size_hooks + + size_pws_pre + + size_pws_base + + EXTRA_SPACE; bool not_enough_memory = true; diff --git a/src/modules/module_15700.c b/src/modules/module_15700.c index 7ea3dffbb..4b473410e 100644 --- a/src/modules/module_15700.c +++ b/src/modules/module_15700.c @@ -130,6 +130,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 size_hooks = kernel_power_max * hashconfig->hook_size; + u64 size_pws_pre = 4; + u64 size_pws_base = 4; + + if (user_options->slow_candidates == true) + { + // size_pws_pre + + size_pws_pre = kernel_power_max * sizeof (pw_pre_t); + + // size_pws_base + + size_pws_base = kernel_power_max * sizeof (pw_pre_t); + } + + // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. + // let's add some extra space just to be sure. + // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit + + u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; + + EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); + EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); + const u64 scrypt_extra_space = device_param->size_bfs + device_param->size_combs @@ -152,7 +175,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE + size_pws_comp + size_pws_idx + size_tmps - + size_hooks; + + size_hooks + + size_pws_pre + + size_pws_base + + EXTRA_SPACE; bool not_enough_memory = true; @@ -212,26 +238,6 @@ u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UN return tmp_size; } -bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - // Invalid extra buffer size. - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) - { - return true; - } - } - - // w_opencl_runtime_p_2021.2.0.616.exe: password not found - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)) - { - return true; - } - - return false; -} - bool module_jit_cache_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { return true; @@ -503,6 +509,6 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_st_hash = module_st_hash; module_ctx->module_st_pass = module_st_pass; module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = module_unstable_warning; + module_ctx->module_unstable_warning = MODULE_DEFAULT; module_ctx->module_warmup_disable = module_warmup_disable; } diff --git a/src/modules/module_22700.c b/src/modules/module_22700.c index 4dccfd560..f866bc235 100644 --- a/src/modules/module_22700.c +++ b/src/modules/module_22700.c @@ -53,14 +53,8 @@ static const u64 SCRYPT_P = 1; bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - // Invalid extra buffer size. - return true; - } - - // w_opencl_runtime_p_2021.2.0.616.exe: password not found - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)) + // amdgpu-pro-20.50-1234664-ubuntu-20.04 (rocr): password not found + if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { return true; } @@ -133,6 +127,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE const u64 size_hooks = kernel_power_max * hashconfig->hook_size; + u64 size_pws_pre = 4; + u64 size_pws_base = 4; + + if (user_options->slow_candidates == true) + { + // size_pws_pre + + size_pws_pre = kernel_power_max * sizeof (pw_pre_t); + + // size_pws_base + + size_pws_base = kernel_power_max * sizeof (pw_pre_t); + } + + // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. + // let's add some extra space just to be sure. + // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit + + u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max; + + EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL)); + EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL)); + const u64 scrypt_extra_space = device_param->size_bfs + device_param->size_combs @@ -155,7 +172,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE + size_pws_comp + size_pws_idx + size_tmps - + size_hooks; + + size_hooks + + size_pws_pre + + size_pws_base + + EXTRA_SPACE; bool not_enough_memory = true;