From 074411418fa43e8f69a003419e5d84c9d8dcdd93 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sat, 18 Mar 2023 23:16:01 +0000 Subject: [PATCH] Fix -a 3 for -m 31400 in vector datatype mode --- OpenCL/m31400_a0-pure.cl | 244 ++++++----- OpenCL/m31400_a1-pure.cl | 135 ++++--- OpenCL/m31400_a3-pure.cl | 808 +++++++++++++++---------------------- docs/changes.txt | 1 + src/modules/module_31400.c | 4 +- 5 files changed, 517 insertions(+), 675 deletions(-) diff --git a/OpenCL/m31400_a0-pure.cl b/OpenCL/m31400_a0-pure.cl index cd11beb97..082b52f4b 100644 --- a/OpenCL/m31400_a0-pure.cl +++ b/OpenCL/m31400_a0-pure.cl @@ -22,33 +22,33 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { -const int offset_switch = offset / 4; + const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { - case 0: + case 0: w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; - case 1: + case 1: w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; - case 2: + case 2: w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; - case 3: + case 3: w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; @@ -61,7 +61,7 @@ const int offset_switch = offset / 4; w0[1] = 0; w0[0] = 0; break; - } + } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV @@ -71,33 +71,33 @@ const int offset_switch = offset / 4; #endif #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { - case 0: + case 0: w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; - case 1: + case 1: w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; - case 2: + case 2: w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; - case 3: + case 3: w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; @@ -114,115 +114,113 @@ const int offset_switch = offset / 4; #endif } - -DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const int pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { - AES256_set_encrypt_key(aes_ks, hash, s_te0, s_te1, s_te2, s_te3); + AES256_set_encrypt_key (aes_ks, hash, s_te0, s_te1, s_te2, s_te3); - shift_buffer_by_offset(hash,pw_len+4); + shift_buffer_by_offset (hash, pw_len + 4); - hash[0] = hc_swap32_S(pw_len); - hash[1] |= hc_swap32_S(pw[0]); - hash[2] |= hc_swap32_S(pw[1]); - hash[3] |= hc_swap32_S(pw[2]); - - AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); + hash[0] = hc_swap32_S (pw_len); + hash[1] |= hc_swap32_S (pw[0]); + hash[2] |= hc_swap32_S (pw[1]); + hash[3] |= hc_swap32_S (pw[2]); + + AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); } KERNEL_FQ void m31400_mxx (KERN_ATTR_RULES ()) { - /** - * modifier - */ + /** + * modifier + */ - const u64 lid = get_local_id (0); - const u64 gid = get_global_id (0); - const u64 lsz = get_local_size (0); + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); - /** - * aes shared - */ + /** + * aes shared + */ -#ifdef REAL_SHM + #ifdef REAL_SHM - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } - SYNC_THREADS(); + SYNC_THREADS(); -#else + #else - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; -#endif + #endif + + if (gid >= GID_CNT) return; - if (gid >= GID_CNT) return; + /** + * base + */ + + COPY_PW (pws[gid]); - /** - * base - */ + u32 ks[60]; - COPY_PW (pws[gid]); + /** + * loop + */ - /** - * loop - */ + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; - u32 ks[60]; + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); - for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) - { - pw_t tmp = PASTE_PW; + sha256_ctx_t ctx; - tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + sha256_init (&ctx); - sha256_ctx_t ctx; + sha256_update_swap (&ctx, tmp.i, tmp.pw_len); - sha256_init (&ctx); + sha256_final (&ctx); - sha256_update_swap (&ctx, tmp.i, tmp.pw_len); + u32 out[4] = { 0 }; - sha256_final (&ctx); - - u32 out[4]={0}; + aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out,s_te0, s_te1, s_te2, s_te3, s_te4); - aes256_scrt_format(ks,tmp.i,tmp.pw_len,ctx.h,out,s_te0, s_te1, s_te2, s_te3, s_te4); - - const u32 r0 = out[DGST_R0]; - const u32 r1 = out[DGST_R1]; - const u32 r2 = out[DGST_R2]; - const u32 r3 = out[DGST_R3]; + const u32 r0 = out[DGST_R0]; + const u32 r1 = out[DGST_R1]; + const u32 r2 = out[DGST_R2]; + const u32 r3 = out[DGST_R3]; - COMPARE_M_SCALAR (r0, r1, r2, r3); - } + COMPARE_M_SCALAR (r0, r1, r2, r3); + } } KERNEL_FQ void m31400_sxx (KERN_ATTR_RULES ()) { + /** + * modifier + */ - /** - * modifier - */ - - const u64 lid = get_local_id (0); - const u64 gid = get_global_id (0); - const u64 lsz = get_local_size (0); + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); /** * aes shared @@ -245,7 +243,7 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_RULES ()) s_te4[i] = te4[i]; } - SYNC_THREADS (); + SYNC_THREADS(); #else @@ -257,55 +255,55 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_RULES ()) #endif - if (gid >= GID_CNT) return; + if (gid >= GID_CNT) return; + + /** + * digest + */ - /** - * digest - */ + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; - const u32 search[4] = - { - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] - }; + /** + * base + */ - /** - * base - */ + COPY_PW (pws[gid]); - COPY_PW (pws[gid]); + u32 ks[60]; - u32 ks[60]; - - /** - * loop - */ + /** + * loop + */ - for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) - { - pw_t tmp = PASTE_PW; + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; - tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); - sha256_ctx_t ctx; + sha256_ctx_t ctx; - sha256_init (&ctx); + sha256_init (&ctx); - sha256_update_swap (&ctx, tmp.i, tmp.pw_len); + sha256_update_swap (&ctx, tmp.i, tmp.pw_len); - sha256_final (&ctx); + sha256_final (&ctx); - u32 out[4]={0}; + u32 out[4] = { 0 }; - aes256_scrt_format(ks,tmp.i,tmp.pw_len,ctx.h,out,s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format (ks, tmp.i, tmp.pw_len, ctx.h, out,s_te0, s_te1, s_te2, s_te3, s_te4); - const u32 r0 = out[DGST_R0]; - const u32 r1 = out[DGST_R1]; - const u32 r2 = out[DGST_R2]; - const u32 r3 = out[DGST_R3]; + const u32 r0 = out[DGST_R0]; + const u32 r1 = out[DGST_R1]; + const u32 r2 = out[DGST_R2]; + const u32 r3 = out[DGST_R3]; - COMPARE_S_SCALAR (r0, r1, r2, r3); - } + COMPARE_S_SCALAR (r0, r1, r2, r3); + } } diff --git a/OpenCL/m31400_a1-pure.cl b/OpenCL/m31400_a1-pure.cl index 04cef3256..b3a630130 100644 --- a/OpenCL/m31400_a1-pure.cl +++ b/OpenCL/m31400_a1-pure.cl @@ -22,33 +22,33 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { -const int offset_switch = offset / 4; + const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { - case 0: + case 0: w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; - case 1: + case 1: w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; - case 2: + case 2: w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; - case 3: + case 3: w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; @@ -61,7 +61,7 @@ const int offset_switch = offset / 4; w0[1] = 0; w0[0] = 0; break; - } + } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV @@ -71,33 +71,33 @@ const int offset_switch = offset / 4; #endif #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { - case 0: + case 0: w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; - case 1: + case 1: w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; - case 2: + case 2: w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; - case 3: + case 3: w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; @@ -114,19 +114,18 @@ const int offset_switch = offset / 4; #endif } - -DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const int pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { - AES256_set_encrypt_key(aes_ks, hash, s_te0, s_te1, s_te2, s_te3); + AES256_set_encrypt_key (aes_ks, hash, s_te0, s_te1, s_te2, s_te3); + + shift_buffer_by_offset (hash, pw_len + 4); - shift_buffer_by_offset(hash,pw_len+4); + hash[0] = hc_swap32_S (pw_len); + hash[1] |= hc_swap32_S (pw[0]); + hash[2] |= hc_swap32_S (pw[1]); + hash[3] |= hc_swap32_S (pw[2]); - hash[0] = hc_swap32_S(pw_len); - hash[1] |= hc_swap32_S(pw[0]); - hash[2] |= hc_swap32_S(pw[1]); - hash[3] |= hc_swap32_S(pw[2]); - - AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); + AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); } KERNEL_FQ void m31400_mxx (KERN_ATTR_BASIC ()) @@ -137,40 +136,40 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_BASIC ()) const u64 lid = get_local_id (0); const u64 gid = get_global_id (0); - const u64 lsz = get_local_size (0); + const u64 lsz = get_local_size (0); - /** - * aes shared - */ + /** + * aes shared + */ -#ifdef REAL_SHM + #ifdef REAL_SHM - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } - SYNC_THREADS(); + SYNC_THREADS(); -#else + #else - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; -#endif + #endif if (gid >= GID_CNT) return; @@ -184,12 +183,12 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_BASIC ()) sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + u32 ks[60]; + /** * loop */ - u32 ks[60]; - for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) { sha256_ctx_t ctx = ctx0; @@ -197,21 +196,23 @@ KERNEL_FQ void m31400_mxx (KERN_ATTR_BASIC ()) sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); u32 pw_candidate[3]; - pw_candidate[0]= hc_swap32_S(ctx.w0[0]); - pw_candidate[1]= hc_swap32_S(ctx.w0[1]); - pw_candidate[2]= hc_swap32_S(ctx.w0[2]); + + pw_candidate[0] = hc_swap32_S (ctx.w0[0]); + pw_candidate[1] = hc_swap32_S (ctx.w0[1]); + pw_candidate[2] = hc_swap32_S (ctx.w0[2]); + u32 pw_len=ctx.len; sha256_final (&ctx); - u32 out[4]={0}; + u32 out[4] = { 0 }; - aes256_scrt_format(ks,pw_candidate,pw_len,ctx.h,out,s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format (ks, pw_candidate, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32 r0 = out[DGST_R0]; - const u32 r1 = out[DGST_R1]; - const u32 r2 = out[DGST_R2]; - const u32 r3 = out[DGST_R3]; + const u32 r0 = out[DGST_R0]; + const u32 r1 = out[DGST_R1]; + const u32 r2 = out[DGST_R2]; + const u32 r3 = out[DGST_R3]; COMPARE_M_SCALAR (r0, r1, r2, r3); } @@ -225,7 +226,7 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_BASIC ()) const u64 lid = get_local_id (0); const u64 gid = get_global_id (0); - const u64 lsz = get_local_size (0); + const u64 lsz = get_local_size (0); /** * aes shared @@ -248,7 +249,7 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_BASIC ()) s_te4[i] = te4[i]; } - SYNC_THREADS (); + SYNC_THREADS(); #else @@ -295,18 +296,20 @@ KERNEL_FQ void m31400_sxx (KERN_ATTR_BASIC ()) sha256_ctx_t ctx = ctx0; sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); - + u32 pw_candidate[3]; - pw_candidate[0]=hc_swap32_S(ctx.w0[0]); - pw_candidate[1]=hc_swap32_S(ctx.w0[1]); - pw_candidate[2]=hc_swap32_S(ctx.w0[2]); + + pw_candidate[0] = hc_swap32_S (ctx.w0[0]); + pw_candidate[1] = hc_swap32_S (ctx.w0[1]); + pw_candidate[2] = hc_swap32_S (ctx.w0[2]); + u32 pw_len=ctx.len; sha256_final (&ctx); - u32 out[4]={0}; + u32 out[4] = { 0 }; - aes256_scrt_format(ks,pw_candidate,pw_len,ctx.h,out,s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format (ks, pw_candidate, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); const u32 r0 = out[DGST_R0]; const u32 r1 = out[DGST_R1]; diff --git a/OpenCL/m31400_a3-pure.cl b/OpenCL/m31400_a3-pure.cl index ce35acda2..03f66455c 100644 --- a/OpenCL/m31400_a3-pure.cl +++ b/OpenCL/m31400_a3-pure.cl @@ -1,7 +1,7 @@ /** - * Author......: See docs/credits.txt - * License.....: MIT - */ +* Author......: See docs/credits.txt +* License.....: MIT +*/ #define NEW_SIMD_CODE @@ -20,642 +20,482 @@ #include M2S(INCLUDE_PATH/inc_cipher_aes.cl) #endif -DECLSPEC void shift_buffer_by_offset(PRIVATE_AS u32 *w0, const u32 offset) +DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { - const int offset_switch = offset / 4; + const int offset_switch = offset / 4; -#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - switch (offset_switch) - { + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC + switch (offset_switch) + { case 0: - w0[3] = hc_bytealign_be_S(w0[2], w0[3], offset); - w0[2] = hc_bytealign_be_S(w0[1], w0[2], offset); - w0[1] = hc_bytealign_be_S(w0[0], w0[1], offset); - w0[0] = hc_bytealign_be_S(0, w0[0], offset); - break; + w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); + w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); + w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); + w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); + break; case 1: - w0[3] = hc_bytealign_be_S(w0[1], w0[2], offset); - w0[2] = hc_bytealign_be_S(w0[0], w0[1], offset); - w0[1] = hc_bytealign_be_S(0, w0[0], offset); - w0[0] = 0; - break; + w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); + w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); + w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); + w0[0] = 0; + break; case 2: - w0[3] = hc_bytealign_be_S(w0[0], w0[1], offset); - w0[2] = hc_bytealign_be_S(0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - break; + w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); + w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); + w0[1] = 0; + w0[0] = 0; + break; case 3: - w0[3] = hc_bytealign_be_S(0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; + w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -#endif - -#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - -#if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; -#endif - -#if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); -#endif - - switch (offset_switch) - { + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + } + #endif + + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV + const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); + #endif + + switch (offset_switch) + { case 0: - w0[3] = hc_byte_perm_S(w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S(w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S(w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S(w0[0], 0, selector); - break; + w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); + w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); + w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); + w0[0] = hc_byte_perm_S (w0[0], 0, selector); + break; case 1: - w0[3] = hc_byte_perm_S(w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S(w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S(w0[0], 0, selector); - w0[0] = 0; - break; + w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); + w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); + w0[1] = hc_byte_perm_S (w0[0], 0, selector); + w0[0] = 0; + break; case 2: - w0[3] = hc_byte_perm_S(w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S(w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; + w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); + w0[2] = hc_byte_perm_S (w0[0], 0, selector); + w0[1] = 0; + w0[0] = 0; + break; case 3: - w0[3] = hc_byte_perm_S(w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; + w0[3] = hc_byte_perm_S (w0[0], 0, selector); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -#endif + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + } + #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { - AES256_set_encrypt_key(aes_ks, hash, s_te0, s_te1, s_te2, s_te3); + AES256_set_encrypt_key (aes_ks, hash, s_te0, s_te1, s_te2, s_te3); + + shift_buffer_by_offset (hash, pw_len + 4); - shift_buffer_by_offset(hash, pw_len + 4); + hash[0] = hc_swap32_S (pw_len); + hash[1] |= hc_swap32_S (pw[0]); + hash[2] |= hc_swap32_S (pw[1]); + hash[3] |= hc_swap32_S (pw[2]); - hash[0] = hc_swap32_S(pw_len); - hash[1] |= hc_swap32_S(pw[0]); - hash[2] |= hc_swap32_S(pw[1]); - hash[3] |= hc_swap32_S(pw[2]); - - AES256_encrypt(aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); + AES256_encrypt (aes_ks, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); } -DECLSPEC void aes256_scrt_format_VV(PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w, const u32 pw_len, PRIVATE_AS u32x *hash, PRIVATE_AS u32x *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC void aes256_scrt_format_VV (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32x *w, const u32 pw_len, PRIVATE_AS u32x *h, PRIVATE_AS u32x *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { -#if VECT_SIZE == 1 - aes256_scrt_format(aes_ks, w, pw_len, hash, out, s_te0, s_te1, s_te2, s_te3, s_te4); -#endif + #if VECT_SIZE == 1 + aes256_scrt_format (aes_ks, w, pw_len, h, out, s_te0, s_te1, s_te2, s_te3, s_te4); + #endif -#if VECT_SIZE >= 2 + #if VECT_SIZE >= 2 + u32 tmp_w[16]; + u32 tmp_h[8]; + u32 tmp_out[4]; - u32 tmp_w[4]; - u32 tmp_hash[4]; - u32 tmp_out[4]; + //s0 - //s0 - tmp_w[0] = w[0].s0; - tmp_w[1] = w[1].s0; - tmp_w[2] = w[2].s0; - tmp_w[3] = w[3].s0; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s0; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s0; - tmp_hash[0] = hash[0].s0; - tmp_hash[1] = hash[1].s0; - tmp_hash[2] = hash[2].s0; - tmp_hash[3] = hash[3].s0; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 4; i++) out[i].s0 = tmp_out[i]; - out[0].s0 = tmp_out[0]; - out[1].s0 = tmp_out[1]; - out[2].s0 = tmp_out[2]; - out[3].s0 = tmp_out[3]; + //s1 + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s1; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s1; - //s1 - tmp_w[0] = w[0].s1; - tmp_w[1] = w[1].s1; - tmp_w[2] = w[2].s1; - tmp_w[3] = w[3].s1; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - tmp_hash[0] = hash[0].s1; - tmp_hash[1] = hash[1].s1; - tmp_hash[2] = hash[2].s1; - tmp_hash[3] = hash[3].s1; + for (u32 i = 0; i < 4; i++) out[i].s1 = tmp_out[i]; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + #endif - out[0].s1 = tmp_out[0]; - out[1].s1 = tmp_out[1]; - out[2].s1 = tmp_out[2]; - out[3].s1 = tmp_out[3]; + #if VECT_SIZE >= 4 + //s2 -#endif + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s2; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s2; -#if VECT_SIZE >= 4 - //s2 - tmp_w[0] = w[0].s2; - tmp_w[1] = w[1].s2; - tmp_w[2] = w[2].s2; - tmp_w[3] = w[3].s2; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - tmp_hash[0] = hash[0].s2; - tmp_hash[1] = hash[1].s2; - tmp_hash[2] = hash[2].s2; - tmp_hash[3] = hash[3].s2; + for (u32 i = 0; i < 4; i++) out[i].s2 = tmp_out[i]; - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + //s3 - out[0].s2 = tmp_out[0]; - out[1].s2 = tmp_out[1]; - out[2].s2 = tmp_out[2]; - out[3].s2 = tmp_out[3]; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s3; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s3; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - //s3 - tmp_w[0] = w[0].s3; - tmp_w[1] = w[1].s3; - tmp_w[2] = w[2].s3; - tmp_w[3] = w[3].s3; + for (u32 i = 0; i < 4; i++) out[i].s3 = tmp_out[i]; - tmp_hash[0] = hash[0].s3; - tmp_hash[1] = hash[1].s3; - tmp_hash[2] = hash[2].s3; - tmp_hash[3] = hash[3].s3; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + #endif - out[0].s3 = tmp_out[0]; - out[1].s3 = tmp_out[1]; - out[2].s3 = tmp_out[2]; - out[3].s3 = tmp_out[3]; + #if VECT_SIZE >= 8 + //s4 -#endif + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s4; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s4; -#if VECT_SIZE >= 8 - //s4 - tmp_w[0] = w[0].s4; - tmp_w[1] = w[1].s4; - tmp_w[2] = w[2].s4; - tmp_w[3] = w[3].s4; - - tmp_hash[0] = hash[0].s4; - tmp_hash[1] = hash[1].s4; - tmp_hash[2] = hash[2].s4; - tmp_hash[3] = hash[3].s4; - - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - - out[0].s4 = tmp_out[0]; - out[1].s4 = tmp_out[1]; - out[2].s4 = tmp_out[2]; - out[3].s4 = tmp_out[3]; - - //s5 - tmp_w[0] = w[0].s5; - tmp_w[1] = w[1].s5; - tmp_w[2] = w[2].s5; - tmp_w[3] = w[3].s5; - - tmp_hash[0] = hash[0].s5; - tmp_hash[1] = hash[1].s5; - tmp_hash[2] = hash[2].s5; - tmp_hash[3] = hash[3].s5; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - - out[0].s5 = tmp_out[0]; - out[1].s5 = tmp_out[1]; - out[2].s5 = tmp_out[2]; - out[3].s5 = tmp_out[3]; - - - //s6 - tmp_w[0] = w[0].s6; - tmp_w[1] = w[1].s6; - tmp_w[2] = w[2].s6; - tmp_w[3] = w[3].s6; - - tmp_hash[0] = hash[0].s6; - tmp_hash[1] = hash[1].s6; - tmp_hash[2] = hash[2].s6; - tmp_hash[3] = hash[3].s6; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - - out[0].s6 = tmp_out[0]; - out[1].s6 = tmp_out[1]; - out[2].s6 = tmp_out[2]; - out[3].s6 = tmp_out[3]; - - - //s7 - tmp_w[0] = w[0].s7; - tmp_w[1] = w[1].s7; - tmp_w[2] = w[2].s7; - tmp_w[3] = w[3].s7; - - tmp_hash[0] = hash[0].s7; - tmp_hash[1] = hash[1].s7; - tmp_hash[2] = hash[2].s7; - tmp_hash[3] = hash[3].s7; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - - out[0].s7 = tmp_out[0]; - out[1].s7 = tmp_out[1]; - out[2].s7 = tmp_out[2]; - out[3].s7 = tmp_out[3]; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); -#endif + for (u32 i = 0; i < 4; i++) out[i].s4 = tmp_out[i]; -#if VECT_SIZE >= 16 + //s5 - //s8 - tmp_w[0] = w[0].s8; - tmp_w[1] = w[1].s8; - tmp_w[2] = w[2].s8; - tmp_w[3] = w[3].s8; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s5; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s5; - tmp_hash[0] = hash[0].s8; - tmp_hash[1] = hash[1].s8; - tmp_hash[2] = hash[2].s8; - tmp_hash[3] = hash[3].s8; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 4; i++) out[i].s5 = tmp_out[i]; - out[0].s8 = tmp_out[0]; - out[1].s8 = tmp_out[1]; - out[2].s8 = tmp_out[2]; - out[3].s8 = tmp_out[3]; + //s6 + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s6; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s6; - //s9 - tmp_w[0] = w[0].s9; - tmp_w[1] = w[1].s9; - tmp_w[2] = w[2].s9; - tmp_w[3] = w[3].s9; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - tmp_hash[0] = hash[0].s9; - tmp_hash[1] = hash[1].s9; - tmp_hash[2] = hash[2].s9; - tmp_hash[3] = hash[3].s9; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 4; i++) out[i].s6 = tmp_out[i]; - out[0].s9 = tmp_out[0]; - out[1].s9 = tmp_out[1]; - out[2].s9 = tmp_out[2]; - out[3].s9 = tmp_out[3]; + //s7 + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s7; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s7; - //s10 - tmp_w[0] = w[0].sa; - tmp_w[1] = w[1].sa; - tmp_w[2] = w[2].sa; - tmp_w[3] = w[3].sa; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - tmp_hash[0] = hash[0].sa; - tmp_hash[1] = hash[1].sa; - tmp_hash[2] = hash[2].sa; - tmp_hash[3] = hash[3].sa; + for (u32 i = 0; i < 4; i++) out[i].s7 = tmp_out[i]; - - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + #endif - out[0].sa = tmp_out[0]; - out[1].sa = tmp_out[1]; - out[2].sa = tmp_out[2]; - out[3].sa = tmp_out[3]; + #if VECT_SIZE >= 16 + //s8 - //s11 - tmp_w[0] = w[0].sb; - tmp_w[1] = w[1].sb; - tmp_w[2] = w[2].sb; - tmp_w[3] = w[3].sb; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s8; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s8; - tmp_hash[0] = hash[0].sb; - tmp_hash[1] = hash[1].sb; - tmp_hash[2] = hash[2].sb; - tmp_hash[3] = hash[3].sb; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 4; i++) out[i].s8 = tmp_out[i]; - out[0].sb = tmp_out[0]; - out[1].sb = tmp_out[1]; - out[2].sb = tmp_out[2]; - out[3].sb = tmp_out[3]; + //s9 + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].s9; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].s9; - //s12 - tmp_w[0] = w[0].sc; - tmp_w[1] = w[1].sc; - tmp_w[2] = w[2].sc; - tmp_w[3] = w[3].sc; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - tmp_hash[0] = hash[0].sc; - tmp_hash[1] = hash[1].sc; - tmp_hash[2] = hash[2].sc; - tmp_hash[3] = hash[3].sc; + for (u32 i = 0; i < 4; i++) out[i].s9 = tmp_out[i]; - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + //sa - out[0].sc = tmp_out[0]; - out[1].sc = tmp_out[1]; - out[2].sc = tmp_out[2]; - out[3].sc = tmp_out[3]; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sa; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sa; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - //s13 - tmp_w[0] = w[0].sd; - tmp_w[1] = w[1].sd; - tmp_w[2] = w[2].sd; - tmp_w[3] = w[3].sd; + for (u32 i = 0; i < 4; i++) out[i].sa = tmp_out[i]; - tmp_hash[0] = hash[0].sd; - tmp_hash[1] = hash[1].sd; - tmp_hash[2] = hash[2].sd; - tmp_hash[3] = hash[3].sd; + //sb - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sb; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sb; - out[0].sd = tmp_out[0]; - out[1].sd = tmp_out[1]; - out[2].sd = tmp_out[2]; - out[3].sd = tmp_out[3]; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - //s14 - tmp_w[0] = w[0].se; - tmp_w[1] = w[1].se; - tmp_w[2] = w[2].se; - tmp_w[3] = w[3].se; + for (u32 i = 0; i < 4; i++) out[i].sb = tmp_out[i]; - tmp_hash[0] = hash[0].se; - tmp_hash[1] = hash[1].se; - tmp_hash[2] = hash[2].se; - tmp_hash[3] = hash[3].se; + //sc - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sc; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sc; - out[0].se = tmp_out[0]; - out[1].se = tmp_out[1]; - out[2].se = tmp_out[2]; - out[3].se = tmp_out[3]; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + for (u32 i = 0; i < 4; i++) out[i].sc = tmp_out[i]; - //s15 - tmp_w[0] = w[0].sf; - tmp_w[1] = w[1].sf; - tmp_w[2] = w[2].sf; - tmp_w[3] = w[3].sf; + //sd - tmp_hash[0] = hash[0].sf; - tmp_hash[1] = hash[1].sf; - tmp_hash[2] = hash[2].sf; - tmp_hash[3] = hash[3].sf; + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sd; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sd; - aes256_scrt_format(aes_ks, tmp_w, pw_len, tmp_hash, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); - out[0].sf = tmp_out[0]; - out[1].sf = tmp_out[1]; - out[2].sf = tmp_out[2]; - out[3].sf = tmp_out[3]; + for (u32 i = 0; i < 4; i++) out[i].sd = tmp_out[i]; + //se -#endif + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].se; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].se; + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + + for (u32 i = 0; i < 4; i++) out[i].se = tmp_out[i]; + + //sf + + for (u32 i = 0; i < 64; i++) tmp_w[i] = w[i].sf; + for (u32 i = 0; i < 8; i++) tmp_h[i] = h[i].sf; + + aes256_scrt_format (aes_ks, tmp_w, pw_len, tmp_h, tmp_out, s_te0, s_te1, s_te2, s_te3, s_te4); + + for (u32 i = 0; i < 4; i++) out[i].sf = tmp_out[i]; + + #endif } -KERNEL_FQ void m31400_mxx(KERN_ATTR_VECTOR()) +KERNEL_FQ void m31400_mxx (KERN_ATTR_VECTOR()) { - /** - * modifier - */ + /** + * modifier + */ - const u64 lid = get_local_id(0); - const u64 gid = get_global_id(0); - const u64 lsz = get_local_size(0); + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); - /** - * aes shared - */ + /** + * aes shared + */ -#ifdef REAL_SHM + #ifdef REAL_SHM - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } - SYNC_THREADS(); + SYNC_THREADS(); -#else + #else - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; -#endif + #endif - if (gid >= GID_CNT) - return; + if (gid >= GID_CNT) return; - /** - * base - */ + /** + * base + */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len; - u32x w[64] = {0}; + u32x w[64] = {0}; - for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) - { - w[idx] = pws[gid].i[idx]; - } + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } - u32 aes_ks[60]; + /** + * loop + */ - /** - * loop - */ + u32x w0l = w[0]; - u32x w0l = w[0]; + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; - for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) - { - const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + const u32x w0 = w0l | w0r; - const u32x w0 = w0l | w0r; + w[0] = w0; - w[0] = w0; + sha256_ctx_vector_t ctx; - sha256_ctx_vector_t ctx; + sha256_init_vector (&ctx); - sha256_init_vector(&ctx); + sha256_update_vector_swap (&ctx, w, pw_len); - sha256_update_vector_swap(&ctx, w, pw_len); + sha256_final_vector (&ctx); - sha256_final_vector(&ctx); + u32x out[4] = {0}; - u32x out[4] = {0}; + u32 aes_ks[60]; - aes256_scrt_format_VV(aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format_VV (aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32x r0 = out[DGST_R0]; - const u32x r1 = out[DGST_R1]; - const u32x r2 = out[DGST_R2]; - const u32x r3 = out[DGST_R3]; + const u32x r0 = out[DGST_R0]; + const u32x r1 = out[DGST_R1]; + const u32x r2 = out[DGST_R2]; + const u32x r3 = out[DGST_R3]; - COMPARE_M_SIMD(r0, r1, r2, r3); - } + COMPARE_M_SIMD (r0, r1, r2, r3); + } } -KERNEL_FQ void m31400_sxx(KERN_ATTR_VECTOR()) +KERNEL_FQ void m31400_sxx (KERN_ATTR_VECTOR()) { - /** - * modifier - */ + /** + * modifier + */ - const u64 lid = get_local_id(0); - const u64 gid = get_global_id(0); - const u64 lsz = get_local_size(0); + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); - /** - * aes shared - */ + /** + * aes shared + */ -#ifdef REAL_SHM + #ifdef REAL_SHM - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } - SYNC_THREADS(); + SYNC_THREADS(); -#else + #else - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; -#endif + #endif - if (gid >= GID_CNT) - return; - /** - * digest - */ + if (gid >= GID_CNT) return; - const u32 search[4] = - { - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], - digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]}; + /** + * digest + */ - /** - * base - */ + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; - const u32 pw_len = pws[gid].pw_len; + /** + * base + */ - u32x w[64] = {0}; + const u32 pw_len = pws[gid].pw_len; - for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) - { - w[idx] = pws[gid].i[idx]; - } + u32x w[64] = {0}; - /** - * loop - */ + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } - u32 aes_ks[60]; - u32x w0l = w[0]; + /** + * loop + */ - for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) - { - const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + u32x w0l = w[0]; - const u32x w0 = w0l | w0r; + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; - w[0] = w0; + const u32x w0 = w0l | w0r; - sha256_ctx_vector_t ctx; + w[0] = w0; - sha256_init_vector(&ctx); + sha256_ctx_vector_t ctx; - sha256_update_vector_swap(&ctx, w, pw_len); + sha256_init_vector (&ctx); - sha256_final_vector(&ctx); + sha256_update_vector_swap (&ctx, w, pw_len); - u32x out[4] = {0}; + sha256_final_vector (&ctx); + + u32x out[4] = {0}; + + u32 aes_ks[60]; - aes256_scrt_format_VV(aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); + aes256_scrt_format_VV (aes_ks, w, pw_len, ctx.h, out, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32x r0 = out[DGST_R0]; - const u32x r1 = out[DGST_R1]; - const u32x r2 = out[DGST_R2]; - const u32x r3 = out[DGST_R3]; + const u32x r0 = out[DGST_R0]; + const u32x r1 = out[DGST_R1]; + const u32x r2 = out[DGST_R2]; + const u32x r3 = out[DGST_R3]; - COMPARE_S_SIMD(r0, r1, r2, r3); - } + COMPARE_S_SIMD (r0, r1, r2, r3); + } } diff --git a/docs/changes.txt b/docs/changes.txt index e1ec759d7..bfe6b77cc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -20,6 +20,7 @@ - Added hash-mode: GPG (AES-128/AES-256 (SHA-256($pass))) - Added hash-mode: GPG (AES-128/AES-256 (SHA-512($pass))) - Added hash-mode: MetaMask Wallet (short hash, plaintext check) +- Added hash-mode: SecureCRT MasterPassphrase v2 - Added hash-mode: Veeam VB - Added hash-mode: bcrypt(sha256($pass)) - Added hash-mode: HMAC-RIPEMD160 (key = $pass) diff --git a/src/modules/module_31400.c b/src/modules/module_31400.c index 93ee01c7a..9086a1fd4 100644 --- a/src/modules/module_31400.c +++ b/src/modules/module_31400.c @@ -18,7 +18,7 @@ static const u32 DGST_POS0 = 0; static const u32 DGST_POS1 = 1; static const u32 DGST_POS2 = 2; static const u32 DGST_POS3 = 3; -static const u32 DGST_SIZE = DGST_SIZE_4_8; +static const u32 DGST_SIZE = DGST_SIZE_4_4; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; static const char *HASH_NAME = "SecureCRT MasterPassphrase v2"; static const u64 KERN_TYPE = 31400; @@ -53,7 +53,7 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) { - static const char *CONFIGPASSPHRASEV2_SIGNATURE = "S:\"Config Passphrase\"=02:"; //The whole line is part of the format to prevent confusion with other similiar tokens also prefixed with 02: in the config files + static const char *CONFIGPASSPHRASEV2_SIGNATURE = "S:\"Config Passphrase\"=02:"; //The whole line is part of the format to prevent confusion with other similiar tokens also prefixed with 02: in the config files static const char *CONFIGPASSPHRASEV2_SIGNATURE_UNDERSCORE = "S:_Config_Passphrase_=02:"; //double quotes char messes up testing so we're also allowing underscore instead u32 *digest = (u32 *) digest_buf;