1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-06 14:52:35 +00:00
hashcat/OpenCL/inc_hash_scrypt.h
Jens Steube 4b93a6e93c Add support for detecting unified GPU memory on CUDA and HIP (previously available only for OpenCL and Metal).
Do not adjust kernel-accel or scrypt-tmto for GPUs with unified memory, typically integrated GPUs in CPUs (APUs).
Redesign the "4-buffer" strategy to avoid overallocation from naive division by four, which can significantly increase memory usage for high scrypt configurations (e.g., 256k:8:1).
Update the scrypt B[] access pattern to match the new "4-buffer" design.
Allow user-specified kernel-accel and scrypt-tmto values, individually or both, via command line and tuning database. Any unspecified parameters are adjusted automatically.
Permit user-defined combinations of scrypt-tmto and kernel-accel even if they may exceed available memory.
2025-06-17 13:32:57 +02:00

45 lines
1.9 KiB
C

/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#ifndef INC_HASH_SCRYPT_H
#define INC_HASH_SCRYPT_H
#define GET_SCRYPT_CNT(r,p) (2 * (r) * 16 * (p))
#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N))
#define GET_STATE_CNT(r) (2 * (r) * 16)
#define SCRYPT_CNT GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P)
#define SCRYPT_CNT4 (SCRYPT_CNT / 4)
#define STATE_CNT GET_STATE_CNT (SCRYPT_R)
#define STATE_CNT4 (STATE_CNT / 4)
#define VIDX(bid4,lsz,lid,ySIZE,zSIZE,y,z) (((bid4) * (lsz) * (ySIZE) * (zSIZE)) + ((lid) * (ySIZE) * (zSIZE)) + ((y) * (zSIZE)) + (z))
#if defined IS_CUDA
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
inline __device__ uint4 operator >> (const uint4 a, const u32 b) { return make_uint4 ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); }
inline __device__ uint4 operator + (const uint4 a, const uint4 b) { return make_uint4 ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); }
inline __device__ uint4 operator ^ (const uint4 a, const uint4 b) { return make_uint4 ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); }
inline __device__ uint4 operator | (const uint4 a, const uint4 b) { return make_uint4 ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); }
inline __device__ void operator ^= ( uint4 &a, const uint4 b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; }
#endif
#if defined IS_CUDA || defined IS_HIP
inline __device__ uint4 rotate (const uint4 a, const int n)
{
uint4 r;
r.x = hc_rotl32_S (r.x, n);
r.y = hc_rotl32_S (r.y, n);
r.z = hc_rotl32_S (r.z, n);
r.w = hc_rotl32_S (r.w, n);
return r;
}
#endif
#endif