diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index 5f96f8ec4..3064263a2 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -327,6 +327,9 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -357,6 +360,11 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -467,6 +475,11 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index 20cab27d1..e500b4f70 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -463,6 +463,9 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -493,6 +496,11 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -603,6 +611,11 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 7d65fd52e..a29df1c03 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -400,6 +400,9 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -430,6 +433,11 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -605,6 +613,11 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/OpenCL/m27700-pure.cl b/OpenCL/m27700-pure.cl index feaca11de..c62dc90d6 100644 --- a/OpenCL/m27700-pure.cl +++ b/OpenCL/m27700-pure.cl @@ -351,6 +351,9 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -381,6 +384,11 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -557,6 +565,11 @@ KERNEL_FQ void m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/OpenCL/m28200-pure.cl b/OpenCL/m28200-pure.cl index 2ead5b4bc..2260e931b 100644 --- a/OpenCL/m28200-pure.cl +++ b/OpenCL/m28200-pure.cl @@ -337,6 +337,9 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t)) #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -367,6 +370,11 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t)) X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -525,6 +533,11 @@ KERNEL_FQ void m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t)) T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/OpenCL/m29800-pure.cl b/OpenCL/m29800-pure.cl index de4260279..c72f5c267 100644 --- a/OpenCL/m29800-pure.cl +++ b/OpenCL/m29800-pure.cl @@ -351,6 +351,9 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t)) #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); + #elif defined IS_METAL + const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]); + const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]); #else const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]); @@ -381,6 +384,11 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t)) X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #elif defined IS_METAL + X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w); #else X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); @@ -557,6 +565,11 @@ KERNEL_FQ void m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #elif defined IS_METAL + T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w); #else T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); diff --git a/docs/changes.txt b/docs/changes.txt index 90e8a6f0e..c977aaf87 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -70,6 +70,7 @@ - Fixed bug in 29600 module OPTS_TYPE setting - Fixed bug in grep out-of-memory workaround on Unit Test - Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules +- Fixed a bug in all SCRYPT-based hash modes with Apple Metal - Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH - Fixed build failed for 4410 with vector width > 1 - Fixed build failed for 10700 optimized with Apple Metal