mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-29 03:18:30 +00:00
Unlock all GPU threads for AMD GPUs if WaveFront size is 32 (basically new models)
Add new hash-modes to tools/benchmark_deep.pl Fix MINGW issue on 64 bit constant in refactored kernel-accel limiting section
This commit is contained in:
parent
a4299b74af
commit
d38d40c8ba
@ -10411,11 +10411,21 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param)
|
|||||||
}
|
}
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||||
{
|
{
|
||||||
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
if (device_param->kernel_preferred_wgs_multiple == 64)
|
||||||
|
{
|
||||||
|
// only older AMD GPUs with WaveFront size 64 benefit from this
|
||||||
|
|
||||||
|
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||||
{
|
{
|
||||||
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
if (device_param->kernel_preferred_wgs_multiple == 64)
|
||||||
|
{
|
||||||
|
// only older AMD GPUs with WaveFront size 64 benefit from this
|
||||||
|
|
||||||
|
kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10719,7 +10729,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
|
|||||||
//hiprtc_options[1] = "--device-as-default-execution-space";
|
//hiprtc_options[1] = "--device-as-default-execution-space";
|
||||||
//hiprtc_options[2] = "--gpu-architecture";
|
//hiprtc_options[2] = "--gpu-architecture";
|
||||||
|
|
||||||
hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
|
hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
|
||||||
|
|
||||||
//hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
//hiprtc_options[0] = "--gpu-max-threads-per-block=64";
|
||||||
hiprtc_options[1] = "-nocudainc";
|
hiprtc_options[1] = "-nocudainc";
|
||||||
@ -11804,7 +11814,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->device_name,
|
device_param->device_name,
|
||||||
device_param->opencl_device_version,
|
device_param->opencl_device_version,
|
||||||
device_param->opencl_driver_version,
|
device_param->opencl_driver_version,
|
||||||
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple);
|
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
|
||||||
|
|
||||||
md5_ctx_t md5_ctx;
|
md5_ctx_t md5_ctx;
|
||||||
|
|
||||||
@ -12139,7 +12149,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
device_param->vector_width,
|
device_param->vector_width,
|
||||||
hashconfig->kern_type,
|
hashconfig->kern_type,
|
||||||
extra_value,
|
extra_value,
|
||||||
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple,
|
(user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX),
|
||||||
build_options_module_buf);
|
build_options_module_buf);
|
||||||
|
|
||||||
md5_ctx_t md5_ctx;
|
md5_ctx_t md5_ctx;
|
||||||
@ -14883,6 +14893,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
u32 kernel_accel_max = device_param->kernel_accel_max;
|
u32 kernel_accel_max = device_param->kernel_accel_max;
|
||||||
|
|
||||||
// We need to deal with the situation that the total video RAM > total host RAM.
|
// We need to deal with the situation that the total video RAM > total host RAM.
|
||||||
|
// For the opposite direction, we do that in the loop section below.
|
||||||
// Especially in multi-GPU setups that is very likely.
|
// Especially in multi-GPU setups that is very likely.
|
||||||
// The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates.
|
// The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates.
|
||||||
// They are stored in an aligned order for better performance, but this increases the memory pressure.
|
// They are stored in an aligned order for better performance, but this increases the memory pressure.
|
||||||
@ -14893,7 +14904,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
// We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU
|
// We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU
|
||||||
|
|
||||||
const u64 SIZE_8GB = 8UL * 1024 * 1024 * 1024;
|
const u64 SIZE_8GB = 8ULL * 1024 * 1024 * 1024;
|
||||||
|
|
||||||
u64 accel_limit = SIZE_8GB;
|
u64 accel_limit = SIZE_8GB;
|
||||||
|
|
||||||
@ -14909,6 +14920,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
accel_limit /= 3;
|
accel_limit /= 3;
|
||||||
|
|
||||||
|
// Is possible that the GPU simply has too much hardware resources and 8GB per GPU is not enough, but OTOH we can't get lower than 1
|
||||||
|
|
||||||
|
accel_limit = MAX (accel_limit, 1);
|
||||||
|
|
||||||
// I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel()
|
// I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel()
|
||||||
|
|
||||||
kernel_accel_max = MIN (kernel_accel_max, accel_limit);
|
kernel_accel_max = MIN (kernel_accel_max, accel_limit);
|
||||||
@ -14921,7 +14936,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// find out if we would request too much memory on memory blocks which are based on kernel_accel
|
// Opposite direction check: find out if we would request too much memory on memory blocks which are based on kernel_accel
|
||||||
|
|
||||||
u64 size_pws = 4;
|
u64 size_pws = 4;
|
||||||
u64 size_pws_amp = 4;
|
u64 size_pws_amp = 4;
|
||||||
|
@ -230,16 +230,19 @@ my @hash_types =
|
|||||||
13751,
|
13751,
|
||||||
13761,
|
13761,
|
||||||
13771,
|
13771,
|
||||||
|
13781,
|
||||||
13800,
|
13800,
|
||||||
13900,
|
13900,
|
||||||
14000,
|
14000,
|
||||||
14100,
|
14100,
|
||||||
14400,
|
14400,
|
||||||
|
14500,
|
||||||
14700,
|
14700,
|
||||||
14800,
|
14800,
|
||||||
14900,
|
14900,
|
||||||
15000,
|
15000,
|
||||||
15100,
|
15100,
|
||||||
|
15200,
|
||||||
15300,
|
15300,
|
||||||
15400,
|
15400,
|
||||||
15500,
|
15500,
|
||||||
@ -250,10 +253,13 @@ my @hash_types =
|
|||||||
16200,
|
16200,
|
||||||
16300,
|
16300,
|
||||||
16400,
|
16400,
|
||||||
|
16500,
|
||||||
16600,
|
16600,
|
||||||
|
16700,
|
||||||
16800,
|
16800,
|
||||||
16801,
|
16801,
|
||||||
16900,
|
16900,
|
||||||
|
17210,
|
||||||
17300,
|
17300,
|
||||||
17400,
|
17400,
|
||||||
17500,
|
17500,
|
||||||
@ -333,12 +339,23 @@ my @hash_types =
|
|||||||
24700,
|
24700,
|
||||||
24800,
|
24800,
|
||||||
24900,
|
24900,
|
||||||
|
25000,
|
||||||
|
25100,
|
||||||
|
25200,
|
||||||
25300,
|
25300,
|
||||||
25400,
|
25400,
|
||||||
25500,
|
25500,
|
||||||
|
25700,
|
||||||
25900,
|
25900,
|
||||||
26000,
|
26000,
|
||||||
26100,
|
26100,
|
||||||
|
26200,
|
||||||
|
26300,
|
||||||
|
26401,
|
||||||
|
26402,
|
||||||
|
26403,
|
||||||
|
26500,
|
||||||
|
26600,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (scalar @ARGV)
|
if (scalar @ARGV)
|
||||||
|
Loading…
Reference in New Issue
Block a user