From 564c560dcbdc6ca3d5a13d5a80425ef840f1b8a6 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Tue, 17 Jun 2025 22:17:59 +0200 Subject: [PATCH] porting the new scrypt engine to Apple Metal --- OpenCL/inc_common.h | 3 ++- OpenCL/inc_hash_scrypt.cl | 16 ++++------------ OpenCL/inc_platform.h | 1 + OpenCL/m08900-pure.cl | 8 ++++++-- OpenCL/m15700-pure.cl | 8 ++++++-- OpenCL/m22700-pure.cl | 8 ++++++-- OpenCL/m24000-pure.cl | 8 ++++++-- OpenCL/m27700-pure.cl | 8 ++++++-- OpenCL/m28200-pure.cl | 8 ++++++-- OpenCL/m29800-pure.cl | 8 ++++++-- 10 files changed, 49 insertions(+), 27 deletions(-) diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 03382886e..0b51fbd29 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -126,7 +126,8 @@ #define KERN_ATTR_MAIN_PARAMS \ uint hc_gid [[ thread_position_in_grid ]], \ uint hc_lid [[ thread_position_in_threadgroup ]], \ - uint hc_lsz [[ threads_per_threadgroup ]] + uint hc_lsz [[ threads_per_threadgroup ]], \ + uint hc_bid [[ threadgroup_position_in_grid ]] #endif // IS_METAL /* diff --git a/OpenCL/inc_hash_scrypt.cl b/OpenCL/inc_hash_scrypt.cl index 6655e2191..05a3464de 100644 --- a/OpenCL/inc_hash_scrypt.cl +++ b/OpenCL/inc_hash_scrypt.cl @@ -286,18 +286,14 @@ DECLSPEC void salsa_r_p (PRIVATE_AS u32 *TI) } #ifdef IS_HIP -DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) +DECLSPEC void scrypt_smix_init (LOCAL_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid) #else -DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) +DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid) #endif { const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; - const u64 bid = get_group_id(0); - const u64 lsz = get_local_size(0); - const u64 lid = get_local_id(0); - const u32 xd4 = bid / 4; const u32 xm4 = bid & 3; @@ -324,18 +320,14 @@ DECLSPEC void scrypt_smix_init (PRIVATE_AS uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL } #ifdef IS_HIP -DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) +DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, LOCAL_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid) #else -DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid) +DECLSPEC void scrypt_smix_loop (PRIVATE_AS uint4 *X, PRIVATE_AS uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3, const u64 gid, const u64 lid, const u64 lsz, const u64 bid) #endif { const u32 ySIZE = SCRYPT_N >> SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; - const u64 bid = get_group_id(0); - const u64 lsz = get_local_size(0); - const u64 lid = get_local_id(0); - const u32 xd4 = bid / 4; const u32 xm4 = bid & 3; diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index 1e7805290..cc6489afe 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -74,6 +74,7 @@ DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); #define get_global_id(param) hc_gid #define get_local_id(param) hc_lid #define get_local_size(param) hc_lsz +#define get_group_id(param) hc_bid DECLSPEC u32x rotl32 (const u32x a, const int n); DECLSPEC u32x rotr32 (const u32x a, const int n); diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index 69f5624c5..2cd5e1729 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -40,6 +40,8 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -61,7 +63,7 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -70,6 +72,8 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -93,7 +97,7 @@ KERNEL_FQ void HC_ATTR_SEQ m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index c357e6d97..aa7e497ee 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -176,6 +176,8 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -197,7 +199,7 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -206,6 +208,8 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -229,7 +233,7 @@ KERNEL_FQ void HC_ATTR_SEQ m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 2ac914cdd..be6c18cb9 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -119,6 +119,8 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -140,7 +142,7 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -149,6 +151,8 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -172,7 +176,7 @@ KERNEL_FQ void HC_ATTR_SEQ m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m24000-pure.cl b/OpenCL/m24000-pure.cl index d7d7ed03c..d00e4c139 100644 --- a/OpenCL/m24000-pure.cl +++ b/OpenCL/m24000-pure.cl @@ -184,6 +184,8 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -202,7 +204,7 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -211,6 +213,8 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -234,7 +238,7 @@ KERNEL_FQ void HC_ATTR_SEQ m24000_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, best for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m27700-pure.cl b/OpenCL/m27700-pure.cl index 752342015..2bbe3ba01 100644 --- a/OpenCL/m27700-pure.cl +++ b/OpenCL/m27700-pure.cl @@ -69,6 +69,8 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -90,7 +92,7 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -99,6 +101,8 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -122,7 +126,7 @@ KERNEL_FQ void HC_ATTR_SEQ m27700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m28200-pure.cl b/OpenCL/m28200-pure.cl index ff34ba8a2..18f0cb161 100644 --- a/OpenCL/m28200-pure.cl +++ b/OpenCL/m28200-pure.cl @@ -50,6 +50,8 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -71,7 +73,7 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop_prepare (KERN_ATTR_TMPS_ESALT (exodus_tmp for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -80,6 +82,8 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -103,7 +107,7 @@ KERNEL_FQ void HC_ATTR_SEQ m28200_loop (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exod for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } diff --git a/OpenCL/m29800-pure.cl b/OpenCL/m29800-pure.cl index 51b208e3c..fb58a9822 100644 --- a/OpenCL/m29800-pure.cl +++ b/OpenCL/m29800-pure.cl @@ -69,6 +69,8 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -90,7 +92,7 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } @@ -99,6 +101,8 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + const u64 bid = get_group_id (0); if (gid >= GID_CNT) return; @@ -122,7 +126,7 @@ KERNEL_FQ void HC_ATTR_SEQ m29800_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid); + scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf, gid, lid, lsz, bid); for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; }