1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-12-22 22:58:30 +00:00

Add 4 times single workitem extra buffer size to total extra buffer size to workaround single workitem buffer overflows

This commit is contained in:
Jens Steube 2021-04-19 10:27:51 +02:00
parent 5e9adac1c2
commit 8e47fdf8f5
6 changed files with 126 additions and 42 deletions

View File

@ -466,13 +466,12 @@ DEVICE_TYPE_GPU * 22700 1 N
## Find the ideal -n value, then store it here along with the proper compute device name.
## Formatting guidelines are availabe at the top of this document.
GeForce_GTX_980 * 8900 1 28 1
GeForce_GTX_980 * 9300 1 128 1
GeForce_GTX_980 * 15700 1 1 1
GeForce_GTX_980 * 22700 1 28 1
GeForce_RTX_2080_Ti * 8900 1 68 1
GeForce_RTX_2080_Ti * 8900 1 N 1
GeForce_RTX_2080_Ti * 9300 1 544 1
GeForce_RTX_2080_Ti * 15700 1 4 1
GeForce_RTX_2080_Ti * 22700 1 N 1

View File

@ -8360,7 +8360,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->extra_buffer_size = extra_buffer_size;
size_extra_buffer = extra_buffer_size;
// for the size we actually allocate we need to cheat a bit in order to make it more easy for plugin developer.
//
// we will divide this size by 4 to workaround opencl limitation.
// this collides with a theoretical scenario (like -n1 -T1) where there's only one workitem,
// because inside the kernel the target buffer is selected by workitem_id / 4.
// but the maximum size of the buffer would be only 1/4 of what is needed -> overflow.
//
// to workaround this we make sure that there's always a full buffer in each of the 4 allocated buffers available.
const u64 kernel_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads_max * device_param->kernel_accel_max;
const u64 extra_buffer_size_one = extra_buffer_size / kernel_power_max;
size_extra_buffer = extra_buffer_size + (extra_buffer_size_one * 4);
}
// kern type

View File

@ -58,12 +58,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
return true;
}
// w_opencl_runtime_p_2021.2.0.616.exe: password not found
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
{
return true;
}
return false;
}
@ -132,6 +126,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
@ -154,7 +171,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks;
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;

View File

@ -115,6 +115,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
@ -137,7 +160,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks;
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;

View File

@ -130,6 +130,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
@ -152,7 +175,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks;
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;
@ -212,26 +238,6 @@ u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UN
return tmp_size;
}
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
{
// Invalid extra buffer size.
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
{
return true;
}
}
// w_opencl_runtime_p_2021.2.0.616.exe: password not found
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU))
{
return true;
}
return false;
}
bool module_jit_cache_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
return true;
@ -503,6 +509,6 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_st_hash = module_st_hash;
module_ctx->module_st_pass = module_st_pass;
module_ctx->module_tmp_size = module_tmp_size;
module_ctx->module_unstable_warning = module_unstable_warning;
module_ctx->module_unstable_warning = MODULE_DEFAULT;
module_ctx->module_warmup_disable = module_warmup_disable;
}

View File

@ -53,14 +53,8 @@ static const u64 SCRYPT_P = 1;
bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
{
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
{
// Invalid extra buffer size.
return true;
}
// w_opencl_runtime_p_2021.2.0.616.exe: password not found
if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU))
// amdgpu-pro-20.50-1234664-ubuntu-20.04 (rocr): password not found
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
{
return true;
}
@ -133,6 +127,29 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
const u64 size_hooks = kernel_power_max * hashconfig->hook_size;
u64 size_pws_pre = 4;
u64 size_pws_base = 4;
if (user_options->slow_candidates == true)
{
// size_pws_pre
size_pws_pre = kernel_power_max * sizeof (pw_pre_t);
// size_pws_base
size_pws_base = kernel_power_max * sizeof (pw_pre_t);
}
// sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
// let's add some extra space just to be sure.
// now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * device_param->kernel_accel_max;
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
const u64 scrypt_extra_space
= device_param->size_bfs
+ device_param->size_combs
@ -155,7 +172,10 @@ u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
+ size_pws_comp
+ size_pws_idx
+ size_tmps
+ size_hooks;
+ size_hooks
+ size_pws_pre
+ size_pws_base
+ EXTRA_SPACE;
bool not_enough_memory = true;