mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-06 14:52:35 +00:00

Do not adjust kernel-accel or scrypt-tmto for GPUs with unified memory, typically integrated GPUs in CPUs (APUs). Redesign the "4-buffer" strategy to avoid overallocation from naive division by four, which can significantly increase memory usage for high scrypt configurations (e.g., 256k:8:1). Update the scrypt B[] access pattern to match the new "4-buffer" design. Allow user-specified kernel-accel and scrypt-tmto values, individually or both, via command line and tuning database. Any unspecified parameters are adjusted automatically. Permit user-defined combinations of scrypt-tmto and kernel-accel even if they may exceed available memory.
45 lines
1.9 KiB
C
45 lines
1.9 KiB
C
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#ifndef INC_HASH_SCRYPT_H
|
|
#define INC_HASH_SCRYPT_H
|
|
|
|
#define GET_SCRYPT_CNT(r,p) (2 * (r) * 16 * (p))
|
|
#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N))
|
|
#define GET_STATE_CNT(r) (2 * (r) * 16)
|
|
|
|
#define SCRYPT_CNT GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P)
|
|
#define SCRYPT_CNT4 (SCRYPT_CNT / 4)
|
|
#define STATE_CNT GET_STATE_CNT (SCRYPT_R)
|
|
#define STATE_CNT4 (STATE_CNT / 4)
|
|
|
|
#define VIDX(bid4,lsz,lid,ySIZE,zSIZE,y,z) (((bid4) * (lsz) * (ySIZE) * (zSIZE)) + ((lid) * (ySIZE) * (zSIZE)) + ((y) * (zSIZE)) + (z))
|
|
|
|
#if defined IS_CUDA
|
|
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
|
|
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
|
|
inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return make_uint4 ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
|
|
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
|
|
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
|
|
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
|
|
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
|
|
#endif
|
|
|
|
#if defined IS_CUDA || defined IS_HIP
|
|
inline __device__ uint4 rotate (const uint4 a, const int n)
|
|
{
|
|
uint4 r;
|
|
|
|
r.x = hc_rotl32_S (r.x, n);
|
|
r.y = hc_rotl32_S (r.y, n);
|
|
r.z = hc_rotl32_S (r.z, n);
|
|
r.w = hc_rotl32_S (r.w, n);
|
|
|
|
return r;
|
|
}
|
|
#endif
|
|
|
|
#endif
|