From 44d58f9a450a7bca44ce6b1e86e2521b25c0c8eb Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 20 May 2023 14:02:25 +0200 Subject: [PATCH] Fixed build failed for 13772 and 13773 with Apple Metal --- OpenCL/m13772-pure.cl | 140 +++++++++++++++++++++----------- OpenCL/m13773-pure.cl | 184 +++++++++++++++++++++++++++++++----------- docs/changes.txt | 1 + 3 files changed, 229 insertions(+), 96 deletions(-) diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl index f1fce4e9a..c4b870208 100644 --- a/OpenCL/m13772-pure.cl +++ b/OpenCL/m13772-pure.cl @@ -357,54 +357,98 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, salt_bufs[SALT_POS_HOST].salt_buf, 64); - for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) - { - streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - streebog512_hmac_update_64 (&streebog512_hmac_ctx2, w0, w1, w2, w3, 4); - - streebog512_hmac_final (&streebog512_hmac_ctx2); - - tmps[gid].dgst[i + 0] = streebog512_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = streebog512_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = streebog512_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = streebog512_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = streebog512_hmac_ctx2.opad.h[4]; - tmps[gid].dgst[i + 5] = streebog512_hmac_ctx2.opad.h[5]; - tmps[gid].dgst[i + 6] = streebog512_hmac_ctx2.opad.h[6]; - tmps[gid].dgst[i + 7] = streebog512_hmac_ctx2.opad.h[7]; - - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; - tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; - tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; - } + u32 i = 0; + u32 j = 1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + streebog512_hmac_ctx_t streebog512_hmac_ctx_v1 = streebog512_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + streebog512_hmac_update_64 (&streebog512_hmac_ctx_v1, w0, w1, w2, w3, 4); + + streebog512_hmac_final (&streebog512_hmac_ctx_v1); + + tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v1.opad.h[0]; + tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v1.opad.h[1]; + tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v1.opad.h[2]; + tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v1.opad.h[3]; + tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v1.opad.h[4]; + tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v1.opad.h[5]; + tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v1.opad.h[6]; + tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v1.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + + i=8; + j=2; + + streebog512_hmac_ctx_t streebog512_hmac_ctx_v2 = streebog512_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + streebog512_hmac_update_64 (&streebog512_hmac_ctx_v2, w0, w1, w2, w3, 4); + + streebog512_hmac_final (&streebog512_hmac_ctx_v2); + + tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v2.opad.h[0]; + tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v2.opad.h[1]; + tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v2.opad.h[2]; + tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v2.opad.h[3]; + tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v2.opad.h[4]; + tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v2.opad.h[5]; + tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v2.opad.h[6]; + tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; } KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl index 3551d5560..55f14890c 100644 --- a/OpenCL/m13773-pure.cl +++ b/OpenCL/m13773-pure.cl @@ -422,54 +422,142 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, salt_bufs[SALT_POS_HOST].salt_buf, 64); - for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) - { - streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - streebog512_hmac_update_64 (&streebog512_hmac_ctx2, w0, w1, w2, w3, 4); - - streebog512_hmac_final (&streebog512_hmac_ctx2); - - tmps[gid].dgst[i + 0] = streebog512_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = streebog512_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = streebog512_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = streebog512_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = streebog512_hmac_ctx2.opad.h[4]; - tmps[gid].dgst[i + 5] = streebog512_hmac_ctx2.opad.h[5]; - tmps[gid].dgst[i + 6] = streebog512_hmac_ctx2.opad.h[6]; - tmps[gid].dgst[i + 7] = streebog512_hmac_ctx2.opad.h[7]; - - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; - tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; - tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; - } + u32 i = 0; + u32 j = 1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + streebog512_hmac_ctx_t streebog512_hmac_ctx_v1 = streebog512_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + streebog512_hmac_update_64 (&streebog512_hmac_ctx_v1, w0, w1, w2, w3, 4); + + streebog512_hmac_final (&streebog512_hmac_ctx_v1); + + tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v1.opad.h[0]; + tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v1.opad.h[1]; + tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v1.opad.h[2]; + tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v1.opad.h[3]; + tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v1.opad.h[4]; + tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v1.opad.h[5]; + tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v1.opad.h[6]; + tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v1.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + + i = 8; + j = 2; + + streebog512_hmac_ctx_t streebog512_hmac_ctx_v2 = streebog512_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + streebog512_hmac_update_64 (&streebog512_hmac_ctx_v2, w0, w1, w2, w3, 4); + + streebog512_hmac_final (&streebog512_hmac_ctx_v2); + + tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v2.opad.h[0]; + tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v2.opad.h[1]; + tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v2.opad.h[2]; + tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v2.opad.h[3]; + tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v2.opad.h[4]; + tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v2.opad.h[5]; + tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v2.opad.h[6]; + tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + + i = 16; + j = 3; + + streebog512_hmac_ctx_t streebog512_hmac_ctx_v3 = streebog512_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + streebog512_hmac_update_64 (&streebog512_hmac_ctx_v3, w0, w1, w2, w3, 4); + + streebog512_hmac_final (&streebog512_hmac_ctx_v3); + + tmps[gid].dgst[i + 0] = streebog512_hmac_ctx_v3.opad.h[0]; + tmps[gid].dgst[i + 1] = streebog512_hmac_ctx_v3.opad.h[1]; + tmps[gid].dgst[i + 2] = streebog512_hmac_ctx_v3.opad.h[2]; + tmps[gid].dgst[i + 3] = streebog512_hmac_ctx_v3.opad.h[3]; + tmps[gid].dgst[i + 4] = streebog512_hmac_ctx_v3.opad.h[4]; + tmps[gid].dgst[i + 5] = streebog512_hmac_ctx_v3.opad.h[5]; + tmps[gid].dgst[i + 6] = streebog512_hmac_ctx_v3.opad.h[6]; + tmps[gid].dgst[i + 7] = streebog512_hmac_ctx_v3.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; } KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) diff --git a/docs/changes.txt b/docs/changes.txt index 9dbe1a5f1..e96a0836b 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -60,6 +60,7 @@ - Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules - Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH - Fixed build failed for 4410 with vector width > 1 +- Fixed build failed for 13772 and 13773 with Apple Metal - Fixed build failed for 18400 with Apple Metal - Fixed build failed for 18600 with Apple Metal - Fixed build failed for 31700 with Apple Metal