1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-29 03:18:30 +00:00

Unlock all GPU threads for AMD GPUs if WaveFront size is 32 (basically new models)

Add new hash-modes to tools/benchmark_deep.pl
Fix MINGW issue on 64 bit constant in refactored kernel-accel limiting section
This commit is contained in:
Jens Steube 2021-07-29 10:49:44 +02:00
parent a4299b74af
commit d38d40c8ba
2 changed files with 39 additions and 7 deletions

View File

@ -10411,11 +10411,21 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
} }
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
{ {
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); if (device_param->kernel_preferred_wgs_multiple == 64)
{
// only older AMD GPUs with WaveFront size 64 benefit from this
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
}
} }
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{ {
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); if (device_param->kernel_preferred_wgs_multiple == 64)
{
// only older AMD GPUs with WaveFront size 64 benefit from this
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
}
} }
} }
@ -10719,7 +10729,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
//hiprtc_options[1] = "--device-as-default-execution-space"; //hiprtc_options[1] = "--device-as-default-execution-space";
//hiprtc_options[2] = "--gpu-architecture"; //hiprtc_options[2] = "--gpu-architecture";
hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple); hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
//hiprtc_options[0] = "--gpu-max-threads-per-block=64"; //hiprtc_options[0] = "--gpu-max-threads-per-block=64";
hiprtc_options[1] = "-nocudainc"; hiprtc_options[1] = "-nocudainc";
@ -11804,7 +11814,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->device_name, device_param->device_name,
device_param->opencl_device_version, device_param->opencl_device_version,
device_param->opencl_driver_version, device_param->opencl_driver_version,
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple); (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
md5_ctx_t md5_ctx; md5_ctx_t md5_ctx;
@ -12139,7 +12149,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->vector_width, device_param->vector_width,
hashconfig->kern_type, hashconfig->kern_type,
extra_value, extra_value,
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple, (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX),
build_options_module_buf); build_options_module_buf);
md5_ctx_t md5_ctx; md5_ctx_t md5_ctx;
@ -14883,6 +14893,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
u32 kernel_accel_max = device_param->kernel_accel_max; u32 kernel_accel_max = device_param->kernel_accel_max;
// We need to deal with the situation that the total video RAM > total host RAM. // We need to deal with the situation that the total video RAM > total host RAM.
// For the opposite direction, we do that in the loop section below.
// Especially in multi-GPU setups that is very likely. // Especially in multi-GPU setups that is very likely.
// The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates. // The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates.
// They are stored in an aligned order for better performance, but this increases the memory pressure. // They are stored in an aligned order for better performance, but this increases the memory pressure.
@ -14893,7 +14904,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
// We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU // We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU
const u64 SIZE_8GB = 8UL * 1024 * 1024 * 1024; const u64 SIZE_8GB = 8ULL * 1024 * 1024 * 1024;
u64 accel_limit = SIZE_8GB; u64 accel_limit = SIZE_8GB;
@ -14909,6 +14920,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
accel_limit /= 3; accel_limit /= 3;
// Is possible that the GPU simply has too much hardware resources and 8GB per GPU is not enough, but OTOH we can't get lower than 1
accel_limit = MAX (accel_limit, 1);
// I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel() // I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel()
kernel_accel_max = MIN (kernel_accel_max, accel_limit); kernel_accel_max = MIN (kernel_accel_max, accel_limit);
@ -14921,7 +14936,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
return -1; return -1;
} }
// find out if we would request too much memory on memory blocks which are based on kernel_accel // Opposite direction check: find out if we would request too much memory on memory blocks which are based on kernel_accel
u64 size_pws = 4; u64 size_pws = 4;
u64 size_pws_amp = 4; u64 size_pws_amp = 4;

View File

@ -230,16 +230,19 @@ my @hash_types =
13751, 13751,
13761, 13761,
13771, 13771,
13781,
13800, 13800,
13900, 13900,
14000, 14000,
14100, 14100,
14400, 14400,
14500,
14700, 14700,
14800, 14800,
14900, 14900,
15000, 15000,
15100, 15100,
15200,
15300, 15300,
15400, 15400,
15500, 15500,
@ -250,10 +253,13 @@ my @hash_types =
16200, 16200,
16300, 16300,
16400, 16400,
16500,
16600, 16600,
16700,
16800, 16800,
16801, 16801,
16900, 16900,
17210,
17300, 17300,
17400, 17400,
17500, 17500,
@ -333,12 +339,23 @@ my @hash_types =
24700, 24700,
24800, 24800,
24900, 24900,
25000,
25100,
25200,
25300, 25300,
25400, 25400,
25500, 25500,
25700,
25900, 25900,
26000, 26000,
26100, 26100,
26200,
26300,
26401,
26402,
26403,
26500,
26600,
); );
if (scalar @ARGV) if (scalar @ARGV)