mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-24 15:38:47 +00:00
Increase default iteration count per kernel invocation from 1024 to 2048
Add support for lower iteration counts per kernel invocation than the default, enabling TMTO for low scrypt configurations, such as N=1024 Use TMTO 2 if it reaches 4 times the device processor count, instead of TMTO 1 always Improve performance for low scrypt configurations (hash-mode 9300) Fix unit test for 15700 with correct scrypt configurations Disable CPU over subscription for SCRYPT based algorithms
This commit is contained in:
parent
4d2485db0f
commit
e134564a73
@ -345,9 +345,11 @@ DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
// note: fixed 1024 iterations = forced -u 1024
|
||||
// note: max 2048 iterations = forced -u 2048
|
||||
|
||||
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
|
||||
const u32 N_max = (2048 > ySIZE) ? ySIZE : 2048;
|
||||
|
||||
for (u32 N_pos = 0; N_pos < N_max; N_pos++)
|
||||
{
|
||||
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
|
||||
|
@ -26,8 +26,20 @@ inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return mak
|
||||
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
|
||||
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
|
||||
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
|
||||
#endif
|
||||
|
||||
inline __device__ uint4 rotate (const uint4 a, const int n) { return ((a << n) | ((a >> (32 - n)))); }
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
inline __device__ uint4 rotate (const uint4 a, const int n)
|
||||
{
|
||||
uint4 r;
|
||||
|
||||
r.x = hc_rotl32_S (r.x, n);
|
||||
r.y = hc_rotl32_S (r.y, n);
|
||||
r.z = hc_rotl32_S (r.z, n);
|
||||
r.w = hc_rotl32_S (r.w, n);
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -10,14 +10,14 @@
|
||||
|
||||
u32 scrypt_module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_min = 1024;
|
||||
const u32 kernel_loops_min = 2048;
|
||||
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 scrypt_module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 1024;
|
||||
const u32 kernel_loops_max = 2048;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
@ -84,7 +84,8 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
}
|
||||
else
|
||||
{
|
||||
// find a nice kernel_accel programmatically
|
||||
// find a nice kernel_accel for gpus programmatically
|
||||
// on cpus there's no need for over subscription with scrypt
|
||||
|
||||
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
@ -92,45 +93,51 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
{
|
||||
const float multi = (float) available_mem / size_per_accel;
|
||||
|
||||
int accel_multi;
|
||||
|
||||
for (accel_multi = 1; accel_multi <= 2; accel_multi++)
|
||||
if ((multi * 2) >= device_processors)
|
||||
{
|
||||
kernel_accel_new = multi * (1 << accel_multi);
|
||||
kernel_accel_new = multi * 2;
|
||||
kernel_accel_new -= 2;
|
||||
|
||||
if (kernel_accel_new >= device_processors) break;
|
||||
if ((multi * 4) >= device_processors * 2)
|
||||
{
|
||||
kernel_accel_new = multi * 4;
|
||||
kernel_accel_new -= 4;
|
||||
}
|
||||
}
|
||||
else if ((multi * 4) >= device_processors)
|
||||
{
|
||||
kernel_accel_new = multi * 4;
|
||||
kernel_accel_new -= 4;
|
||||
}
|
||||
|
||||
// we need some space for tmps[], ...
|
||||
|
||||
kernel_accel_new -= (1 << accel_multi);
|
||||
|
||||
// clamp if close to device processors -- 16% seems fine on a 2080ti, and on a 4090
|
||||
|
||||
if ((kernel_accel_new > device_processors) && ((device_processors * 1.16) > kernel_accel_new))
|
||||
if (kernel_accel_new > device_processors)
|
||||
{
|
||||
kernel_accel_new = device_processors;
|
||||
const u32 extra = kernel_accel_new % device_processors;
|
||||
|
||||
if (extra < (device_processors * 0.16))
|
||||
{
|
||||
kernel_accel_new -= extra;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 1; i <= 8; i++)
|
||||
u64 multi = available_mem / size_per_accel;
|
||||
|
||||
if (tmto == 0)
|
||||
{
|
||||
if ((size_per_accel * device_processors * i) < available_mem)
|
||||
{
|
||||
kernel_accel_new = device_processors * i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 1; i <= 8; i++)
|
||||
{
|
||||
if ((size_per_accel * device_processors * i) < available_mem)
|
||||
{
|
||||
kernel_accel_new = device_processors * i;
|
||||
tmto = 2; // we radically assign tmto = 2, since most gpus seem to enjoy that tmto
|
||||
|
||||
multi = available_mem / (size_per_accel >> tmto);
|
||||
}
|
||||
|
||||
multi /= device_processors;
|
||||
multi -= 4;
|
||||
multi = MIN (16, multi);
|
||||
|
||||
kernel_accel_new = device_processors * multi; // should be safe because of tmto
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -144,9 +151,6 @@ const char *scrypt_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
|
||||
|
||||
for (u32 tmto_new = tmto_start; tmto_new <= tmto_stop; tmto_new++)
|
||||
{
|
||||
// we have 1024 hard-coded in the kernel
|
||||
if ((scrypt_N / (1 << tmto_new)) < 1024) continue;
|
||||
|
||||
// global memory check
|
||||
if (available_mem < (kernel_accel_new * (size_per_accel >> tmto_new))) continue;
|
||||
|
||||
@ -187,14 +191,6 @@ u64 scrypt_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig
|
||||
|
||||
u64 size_scrypt = size_per_accel * device_param->kernel_accel_max;
|
||||
|
||||
// We must maintain at least 1024 iteration it's hard-coded in the kernel
|
||||
if ((scrypt_N / (1 << tmto)) < 1024)
|
||||
{
|
||||
fprintf (stderr, "ERROR: SCRYPT-N parameter too low. Invalid tmto specified?\n");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return size_scrypt / (1 << tmto);
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,8 @@ sub module_generate_hash
|
||||
{
|
||||
my $word = shift;
|
||||
my $salt = shift;
|
||||
my $scrypt_N = shift || 1024 ; # 262144 originally
|
||||
my $scrypt_r = shift || 1; # 8 originally
|
||||
my $scrypt_N = shift || 262144;
|
||||
my $scrypt_r = shift || 8;
|
||||
my $scrypt_p = shift || 1;
|
||||
my $ciphertext = shift || random_bytes (32);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user