1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-08-04 12:56:00 +00:00

Merge pull request #4255 from matrix/scrypt_metal

porting the new scrypt engine to Apple Metal
This commit is contained in:
Jens Steube 2025-06-17 22:26:31 +02:00 committed by GitHub
commit e9137e8405
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 49 additions and 27 deletions

View File

@ -126,7 +126,8 @@
#define KERN_ATTR_MAIN_PARAMS \ #define KERN_ATTR_MAIN_PARAMS \
uint hc_gid [[ thread_position_in_grid ]], \ uint hc_gid [[ thread_position_in_grid ]], \
uint hc_lid [[ thread_position_in_threadgroup ]], \ uint hc_lid [[ thread_position_in_threadgroup ]], \
uint hc_lsz [[ threads_per_threadgroup ]] uint hc_lsz [[ threads_per_threadgroup ]], \
uint hc_bid [[ threadgroup_position_in_grid ]]
#endif // IS_METAL #endif // IS_METAL
/* /*

View File

@ -286,18 +286,14 @@ DECLSPEC void salsa_r_p (PRIVATE_AS u32 *TI)
} }
#ifdef IS_HIP #ifdef IS_HIP
DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#else #else
DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#endif #endif
{ {
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO; const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4; const u32 zSIZE = STATE_CNT4;
const u64 bid = get_group_id(0);
const u64 lsz = get_local_size(0);
const u64 lid = get_local_id(0);
const u32 xd4 = bid / 4; const u32 xd4 = bid / 4;
const u32 xm4 = bid & 3; const u32 xm4 = bid & 3;
@ -324,18 +320,14 @@ DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL
} }
#ifdef IS_HIP #ifdef IS_HIP
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#else #else
DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid)
#endif #endif
{ {
const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO; const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4; const u32 zSIZE = STATE_CNT4;
const u64 bid = get_group_id(0);
const u64 lsz = get_local_size(0);
const u64 lid = get_local_id(0);
const u32 xd4 = bid / 4; const u32 xd4 = bid / 4;
const u32 xm4 = bid & 3; const u32 xm4 = bid & 3;

View File

@ -74,6 +74,7 @@ DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val);
#define get_global_id(param) hc_gid #define get_global_id(param) hc_gid
#define get_local_id(param) hc_lid #define get_local_id(param) hc_lid
#define get_local_size(param) hc_lsz #define get_local_size(param) hc_lsz
#define get_group_id(param) hc_bid
DECLSPEC u32x rotl32 (const u32x a, const int n); DECLSPEC u32x rotl32 (const u32x a, const int n);
DECLSPEC u32x rotr32 (const u32x a, const int n); DECLSPEC u32x rotr32 (const u32x a, const int n);

View File

@ -40,6 +40,8 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -61,7 +63,7 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -70,6 +72,8 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -93,7 +97,7 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -176,6 +176,8 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -197,7 +199,7 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -206,6 +208,8 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -229,7 +233,7 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -119,6 +119,8 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -140,7 +142,7 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -149,6 +151,8 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -172,7 +176,7 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -184,6 +184,8 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -202,7 +204,7 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -211,6 +213,8 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -234,7 +238,7 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -69,6 +69,8 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -90,7 +92,7 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -99,6 +101,8 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -122,7 +126,7 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -50,6 +50,8 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -71,7 +73,7 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -80,6 +82,8 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -103,7 +107,7 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }

View File

@ -69,6 +69,8 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -90,7 +92,7 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }
@ -99,6 +101,8 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{ {
const u64 gid = get_global_id (0); const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0); const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
const u64 bid = get_group_id (0);
if (gid >= GID_CNT) return; if (gid >= GID_CNT) return;
@ -122,7 +126,7 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z];
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid);
for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z];
} }