From af622f6df5e421b6695cc401ec4c407027698278 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Fri, 26 Jul 2019 23:47:32 +0200 Subject: [PATCH 1/5] Added hash-mode: BitShares v0.x - sha512(sha512(pass)) --- OpenCL/m01770_a0-optimized.cl | 436 +++++++++++++++++++++++ OpenCL/m01770_a0-pure.cl | 171 +++++++++ OpenCL/m01770_a1-optimized.cl | 605 +++++++++++++++++++++++++++++++ OpenCL/m01770_a1-pure.cl | 165 +++++++++ OpenCL/m01770_a3-optimized.cl | 645 ++++++++++++++++++++++++++++++++++ OpenCL/m01770_a3-pure.cl | 191 ++++++++++ docs/changes.txt | 1 + docs/credits.txt | 1 + docs/readme.txt | 1 + src/modules/module_01770.c | 230 ++++++++++++ tools/test_modules/m01770.pm | 43 +++ 11 files changed, 2489 insertions(+) create mode 100644 OpenCL/m01770_a0-optimized.cl create mode 100644 OpenCL/m01770_a0-pure.cl create mode 100644 OpenCL/m01770_a1-optimized.cl create mode 100644 OpenCL/m01770_a1-pure.cl create mode 100644 OpenCL/m01770_a3-optimized.cl create mode 100644 OpenCL/m01770_a3-pure.cl create mode 100644 src/modules/module_01770.c create mode 100644 tools/test_modules/m01770.pm diff --git a/OpenCL/m01770_a0-optimized.cl b/OpenCL/m01770_a0-optimized.cl new file mode 100644 index 000000000..ebca7fe35 --- /dev/null +++ b/OpenCL/m01770_a0-optimized.cl @@ -0,0 +1,436 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u32x wend = 0x80000000; + u32x wlen = 64*8; + + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = hl32_to_64 (w3[2], w3[3]); + u64x w8_t = hl32_to_64 (wend, 0); + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (0, wlen); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + u32x w4_t[4]; + u32x w5_t[4]; + u32x w6_t[4]; + u32x w7_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = 0; + w4_t[0] = 0; + w4_t[1] = 0; + w4_t[2] = 0; + w4_t[3] = 0; + w5_t[0] = 0; + w5_t[1] = 0; + w5_t[2] = 0; + w5_t[3] = 0; + w6_t[0] = 0; + w6_t[1] = 0; + w6_t[2] = 0; + w6_t[3] = 0; + w7_t[0] = 0; + w7_t[1] = 0; + w7_t[2] = 0; + w7_t[3] = out_len * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_vector (w0_t, w1_t, w2_t, w3_t, w4_t, w5_t, w6_t, w7_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + u32x w4_t[4]; + u32x w5_t[4]; + u32x w6_t[4]; + u32x w7_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = 0; + w4_t[0] = 0; + w4_t[1] = 0; + w4_t[2] = 0; + w4_t[3] = 0; + w5_t[0] = 0; + w5_t[1] = 0; + w5_t[2] = 0; + w5_t[3] = 0; + w6_t[0] = 0; + w6_t[1] = 0; + w6_t[2] = 0; + w6_t[3] = 0; + w7_t[0] = 0; + w7_t[1] = 0; + w7_t[2] = 0; + w7_t[3] = out_len * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_vector (w0_t, w1_t, w2_t, w3_t, w4_t, w5_t, w6_t, w7_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m01770_a0-pure.cl b/OpenCL/m01770_a0-pure.cl new file mode 100644 index 000000000..b69f9528c --- /dev/null +++ b/OpenCL/m01770_a0-pure.cl @@ -0,0 +1,171 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + u32 final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update (&ctx, final, 64); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + u32 final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update (&ctx, final, 64); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01770_a1-optimized.cl b/OpenCL/m01770_a1-optimized.cl new file mode 100644 index 000000000..346be147f --- /dev/null +++ b/OpenCL/m01770_a1-optimized.cl @@ -0,0 +1,605 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; +} + +DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u32x wend = 0x80000000; + u32x wlen = 64*8; + + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = hl32_to_64 (w3[2], w3[3]); + u64x w8_t = hl32_to_64 (wend, 0);; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (0, wlen); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m01770_a1-pure.cl b/OpenCL/m01770_a1-pure.cl new file mode 100644 index 000000000..3c57255d6 --- /dev/null +++ b/OpenCL/m01770_a1-pure.cl @@ -0,0 +1,165 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha512_ctx_t ctx00; + + sha512_init (&ctx00); + + sha512_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx0 = ctx00; + + sha512_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx0); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + u32 final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update (&ctx, final, 64); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha512_ctx_t ctx00; + + sha512_init (&ctx00); + + sha512_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx0 = ctx00; + + sha512_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx0); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + u32 final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update (&ctx, final, 64); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01770_a3-optimized.cl b/OpenCL/m01770_a3-optimized.cl new file mode 100644 index 000000000..541110eab --- /dev/null +++ b/OpenCL/m01770_a3-optimized.cl @@ -0,0 +1,645 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; +} + +DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u32x wend = 0x80000000; + u32x wlen = 64*8; + + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = hl32_to_64 (w3[2], w3[3]); + u64x w8_t = hl32_to_64 (wend, 0);; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (0, wlen); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +DECLSPEC void m01770m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m01770s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_full (w0_t, w1_t, w2_t, w3_t, digest); + + w0_t[0] = h32_from_64 (digest[0]); + w0_t[1] = l32_from_64 (digest[0]); + w0_t[2] = h32_from_64 (digest[1]); + w0_t[3] = l32_from_64 (digest[1]); + w1_t[0] = h32_from_64 (digest[2]); + w1_t[1] = l32_from_64 (digest[2]); + w1_t[2] = h32_from_64 (digest[3]); + w1_t[3] = l32_from_64 (digest[3]); + w2_t[0] = h32_from_64 (digest[4]); + w2_t[1] = l32_from_64 (digest[4]); + w2_t[2] = h32_from_64 (digest[5]); + w2_t[3] = l32_from_64 (digest[5]); + w3_t[0] = h32_from_64 (digest[6]); + w3_t[1] = l32_from_64 (digest[6]); + w3_t[2] = h32_from_64 (digest[7]); + w3_t[3] = l32_from_64 (digest[7]); + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_opt (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); +} diff --git a/OpenCL/m01770_a3-pure.cl b/OpenCL/m01770_a3-pure.cl new file mode 100644 index 000000000..3a4a27c14 --- /dev/null +++ b/OpenCL/m01770_a3-pure.cl @@ -0,0 +1,191 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_vector_t ctx0; + + sha512_init_vector (&ctx0); + + sha512_update_vector (&ctx0, w, pw_len); + + sha512_final_vector (&ctx0); + + sha512_ctx_t ctx; + + sha512_init_vector (&ctx); + + u32x final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update_vector (&ctx, final, 64); + + sha512_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[7]); + const u32x r1 = h32_from_64 (ctx.h[7]); + const u32x r2 = l32_from_64 (ctx.h[3]); + const u32x r3 = h32_from_64 (ctx.h[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_vector_t ctx0; + + sha512_init_vector (&ctx0); + + sha512_update_vector (&ctx0, w, pw_len); + + sha512_final_vector (&ctx0); + + sha512_ctx_t ctx; + + sha512_init_vector (&ctx); + + u32x final[32] = { 0 }; + + final[ 0] = h32_from_64_S (ctx0.h[0]); + final[ 1] = l32_from_64_S (ctx0.h[0]); + final[ 2] = h32_from_64_S (ctx0.h[1]); + final[ 3] = l32_from_64_S (ctx0.h[1]); + final[ 4] = h32_from_64_S (ctx0.h[2]); + final[ 5] = l32_from_64_S (ctx0.h[2]); + final[ 6] = h32_from_64_S (ctx0.h[3]); + final[ 7] = l32_from_64_S (ctx0.h[3]); + final[ 8] = h32_from_64_S (ctx0.h[4]); + final[ 9] = l32_from_64_S (ctx0.h[4]); + final[10] = h32_from_64_S (ctx0.h[5]); + final[11] = l32_from_64_S (ctx0.h[5]); + final[12] = h32_from_64_S (ctx0.h[6]); + final[13] = l32_from_64_S (ctx0.h[6]); + final[14] = h32_from_64_S (ctx0.h[7]); + final[15] = l32_from_64_S (ctx0.h[7]); + + sha512_update_vector (&ctx, final, 64); + + sha512_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[7]); + const u32x r1 = h32_from_64 (ctx.h[7]); + const u32x r2 = l32_from_64 (ctx.h[3]); + const u32x r3 = h32_from_64 (ctx.h[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/docs/changes.txt b/docs/changes.txt index 0bb228187..809c29c3b 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -35,6 +35,7 @@ - Added hash-mode: Python passlib pbkdf2-sha256 - Added hash-mode: Python passlib pbkdf2-sha512 - Added hash-mode: Oracle Transportation Management (SHA256) +- Added hash-mode: BitShares v0.x - sha512(sha512(pass)) ## ## Bugs diff --git a/docs/credits.txt b/docs/credits.txt index bd21d35d7..a70e4506e 100644 --- a/docs/credits.txt +++ b/docs/credits.txt @@ -18,6 +18,7 @@ Philipp "philsmd" Schmidt (@philsmd) Gabriele "matrix" Gristina (@gm4tr1x) +* BitShares v0.x - sha512(sha512(pass)) kernel module * gzip wordlists feature * SHA-224 kernel module + optimizations * Some AES OpenCL kernel module optimizations diff --git a/docs/readme.txt b/docs/readme.txt index 9e36b73ce..6ab191951 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -92,6 +92,7 @@ NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 o - sha512($salt.$pass) - sha512(utf16le($pass).$salt) - sha512($salt.utf16le($pass)) +- BitShares v0.x - sha512(sha512(pass)) - HMAC-MD5 (key = $pass) - HMAC-MD5 (key = $salt) - HMAC-SHA1 (key = $pass) diff --git a/src/modules/module_01770.c b/src/modules/module_01770.c new file mode 100644 index 000000000..a12c4b53f --- /dev/null +++ b/src/modules/module_01770.c @@ -0,0 +1,230 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 14; +static const u32 DGST_POS1 = 15; +static const u32 DGST_POS2 = 6; +static const u32 DGST_POS3 = 7; +static const u32 DGST_SIZE = DGST_SIZE_8_8; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; +static const char *HASH_NAME = "BitShares v0.x - sha512(sha512(pass))"; +static const u64 KERN_TYPE = 1770; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_INIT + | OPTI_TYPE_EARLY_SKIP + | OPTI_TYPE_NOT_ITERATED + | OPTI_TYPE_NOT_SALTED + | OPTI_TYPE_USES_BITS_64 + | OPTI_TYPE_RAW_HASH; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_BE + | OPTS_TYPE_PT_ADD80 + | OPTS_TYPE_PT_ADDBITS15; +static const u32 SALT_TYPE = SALT_TYPE_NONE; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "caec04bdf7c17f763a9ec7439f7c9abda112f1bfc9b1bb684fef9b6142636979b9896cfc236896d821a69a961a143dd19c96d59777258201f1bbe5ecc2a2ecf5"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u64 *digest = (u64 *) digest_buf; + + token_t token; + + token.token_cnt = 1; + + token.len_min[0] = 128; + token.len_max[0] = 128; + token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_pos = token.buf[0]; + + digest[0] = hex_to_u64 (hash_pos + 0); + digest[1] = hex_to_u64 (hash_pos + 16); + digest[2] = hex_to_u64 (hash_pos + 32); + digest[3] = hex_to_u64 (hash_pos + 48); + digest[4] = hex_to_u64 (hash_pos + 64); + digest[5] = hex_to_u64 (hash_pos + 80); + digest[6] = hex_to_u64 (hash_pos + 96); + digest[7] = hex_to_u64 (hash_pos + 112); + + digest[0] = byte_swap_64 (digest[0]); + digest[1] = byte_swap_64 (digest[1]); + digest[2] = byte_swap_64 (digest[2]); + digest[3] = byte_swap_64 (digest[3]); + digest[4] = byte_swap_64 (digest[4]); + digest[5] = byte_swap_64 (digest[5]); + digest[6] = byte_swap_64 (digest[6]); + digest[7] = byte_swap_64 (digest[7]); + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + digest[0] -= SHA512M_A; + digest[1] -= SHA512M_B; + digest[2] -= SHA512M_C; + digest[3] -= SHA512M_D; + digest[4] -= SHA512M_E; + digest[5] -= SHA512M_F; + digest[6] -= SHA512M_G; + digest[7] -= SHA512M_H; + } + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u64 *digest = (const u64 *) digest_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u64 tmp[8]; + + tmp[0] = digest[0]; + tmp[1] = digest[1]; + tmp[2] = digest[2]; + tmp[3] = digest[3]; + tmp[4] = digest[4]; + tmp[5] = digest[5]; + tmp[6] = digest[6]; + tmp[7] = digest[7]; + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + tmp[0] += SHA512M_A; + tmp[1] += SHA512M_B; + tmp[2] += SHA512M_C; + tmp[3] += SHA512M_D; + tmp[4] += SHA512M_E; + tmp[5] += SHA512M_F; + tmp[6] += SHA512M_G; + tmp[7] += SHA512M_H; + } + + tmp[0] = byte_swap_64 (tmp[0]); + tmp[1] = byte_swap_64 (tmp[1]); + tmp[2] = byte_swap_64 (tmp[2]); + tmp[3] = byte_swap_64 (tmp[3]); + tmp[4] = byte_swap_64 (tmp[4]); + tmp[5] = byte_swap_64 (tmp[5]); + tmp[6] = byte_swap_64 (tmp[6]); + tmp[7] = byte_swap_64 (tmp[7]); + + + u8 *out_buf = (u8 *) line_buf; + + u64_to_hex (tmp[0], out_buf + 0); + u64_to_hex (tmp[1], out_buf + 16); + u64_to_hex (tmp[2], out_buf + 32); + u64_to_hex (tmp[3], out_buf + 48); + u64_to_hex (tmp[4], out_buf + 64); + u64_to_hex (tmp[5], out_buf + 80); + u64_to_hex (tmp[6], out_buf + 96); + u64_to_hex (tmp[7], out_buf + 112); + + const int out_len = 128; + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m01770.pm b/tools/test_modules/m01770.pm new file mode 100644 index 000000000..f3d4d7778 --- /dev/null +++ b/tools/test_modules/m01770.pm @@ -0,0 +1,43 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::SHA qw (sha512); +use Digest::SHA qw (sha512_hex); + +sub module_constraints { [[0, 256], [-1, -1], [0, 55], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + + my $digest = sha512_hex (sha512 ($word)); + + my $hash = sprintf ("%s", $digest); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed); + + return ($new_hash, $word); +} + +1; From 1a110b4f7119f0c87455ea3abe81f26baf450348 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Tue, 30 Jul 2019 14:12:42 +0200 Subject: [PATCH 2/5] cleanup credits.txt and changes.txt --- docs/changes.txt | 1 - docs/credits.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 809c29c3b..0bb228187 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -35,7 +35,6 @@ - Added hash-mode: Python passlib pbkdf2-sha256 - Added hash-mode: Python passlib pbkdf2-sha512 - Added hash-mode: Oracle Transportation Management (SHA256) -- Added hash-mode: BitShares v0.x - sha512(sha512(pass)) ## ## Bugs diff --git a/docs/credits.txt b/docs/credits.txt index a70e4506e..bd21d35d7 100644 --- a/docs/credits.txt +++ b/docs/credits.txt @@ -18,7 +18,6 @@ Philipp "philsmd" Schmidt (@philsmd) Gabriele "matrix" Gristina (@gm4tr1x) -* BitShares v0.x - sha512(sha512(pass)) kernel module * gzip wordlists feature * SHA-224 kernel module + optimizations * Some AES OpenCL kernel module optimizations From bba2ee65a2b7f20b5e2b31ec13710d2ae10e9ee2 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Tue, 30 Jul 2019 15:15:49 +0200 Subject: [PATCH 3/5] switch hash-mode from 1770 to 21000 --- ...a0-optimized.cl => m21000_a0-optimized.cl} | 12 ++++---- .../{m01770_a0-pure.cl => m21000_a0-pure.cl} | 4 +-- ...a1-optimized.cl => m21000_a1-optimized.cl} | 12 ++++---- .../{m01770_a1-pure.cl => m21000_a1-pure.cl} | 4 +-- ...a3-optimized.cl => m21000_a3-optimized.cl} | 28 +++++++++---------- .../{m01770_a3-pure.cl => m21000_a3-pure.cl} | 4 +-- docs/readme.txt | 1 - .../{module_01770.c => module_21000.c} | 2 +- tools/test_modules/{m01770.pm => m21000.pm} | 0 9 files changed, 33 insertions(+), 34 deletions(-) rename OpenCL/{m01770_a0-optimized.cl => m21000_a0-optimized.cl} (97%) rename OpenCL/{m01770_a0-pure.cl => m21000_a0-pure.cl} (97%) rename OpenCL/{m01770_a1-optimized.cl => m21000_a1-optimized.cl} (98%) rename OpenCL/{m01770_a1-pure.cl => m21000_a1-pure.cl} (97%) rename OpenCL/{m01770_a3-optimized.cl => m21000_a3-optimized.cl} (95%) rename OpenCL/{m01770_a3-pure.cl => m21000_a3-pure.cl} (97%) rename src/modules/{module_01770.c => module_21000.c} (99%) rename tools/test_modules/{m01770.pm => m21000.pm} (100%) diff --git a/OpenCL/m01770_a0-optimized.cl b/OpenCL/m21000_a0-optimized.cl similarity index 97% rename from OpenCL/m01770_a0-optimized.cl rename to OpenCL/m21000_a0-optimized.cl index ebca7fe35..c7cfa5b8d 100644 --- a/OpenCL/m01770_a0-optimized.cl +++ b/OpenCL/m21000_a0-optimized.cl @@ -125,7 +125,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * digest[7] = h; } -KERNEL_FQ void m01770_m04 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_m04 (KERN_ATTR_RULES ()) { /** * modifier @@ -266,15 +266,15 @@ KERNEL_FQ void m01770_m04 (KERN_ATTR_RULES ()) } } -KERNEL_FQ void m01770_m08 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_m08 (KERN_ATTR_RULES ()) { } -KERNEL_FQ void m01770_m16 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_m16 (KERN_ATTR_RULES ()) { } -KERNEL_FQ void m01770_s04 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_s04 (KERN_ATTR_RULES ()) { /** * modifier @@ -427,10 +427,10 @@ KERNEL_FQ void m01770_s04 (KERN_ATTR_RULES ()) } } -KERNEL_FQ void m01770_s08 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_s08 (KERN_ATTR_RULES ()) { } -KERNEL_FQ void m01770_s16 (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_s16 (KERN_ATTR_RULES ()) { } diff --git a/OpenCL/m01770_a0-pure.cl b/OpenCL/m21000_a0-pure.cl similarity index 97% rename from OpenCL/m01770_a0-pure.cl rename to OpenCL/m21000_a0-pure.cl index b69f9528c..247fc3b3d 100644 --- a/OpenCL/m01770_a0-pure.cl +++ b/OpenCL/m21000_a0-pure.cl @@ -16,7 +16,7 @@ #include "inc_hash_sha512.cl" #endif -KERNEL_FQ void m01770_mxx (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_mxx (KERN_ATTR_RULES ()) { /** * modifier @@ -87,7 +87,7 @@ KERNEL_FQ void m01770_mxx (KERN_ATTR_RULES ()) } } -KERNEL_FQ void m01770_sxx (KERN_ATTR_RULES ()) +KERNEL_FQ void m21000_sxx (KERN_ATTR_RULES ()) { /** * modifier diff --git a/OpenCL/m01770_a1-optimized.cl b/OpenCL/m21000_a1-optimized.cl similarity index 98% rename from OpenCL/m01770_a1-optimized.cl rename to OpenCL/m21000_a1-optimized.cl index 346be147f..7ff4577f7 100644 --- a/OpenCL/m01770_a1-optimized.cl +++ b/OpenCL/m21000_a1-optimized.cl @@ -218,7 +218,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * digest[7] = h; } -KERNEL_FQ void m01770_m04 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_m04 (KERN_ATTR_BASIC ()) { /** * modifier @@ -397,15 +397,15 @@ KERNEL_FQ void m01770_m04 (KERN_ATTR_BASIC ()) } } -KERNEL_FQ void m01770_m08 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_m08 (KERN_ATTR_BASIC ()) { } -KERNEL_FQ void m01770_m16 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_m16 (KERN_ATTR_BASIC ()) { } -KERNEL_FQ void m01770_s04 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_s04 (KERN_ATTR_BASIC ()) { /** * modifier @@ -596,10 +596,10 @@ KERNEL_FQ void m01770_s04 (KERN_ATTR_BASIC ()) } } -KERNEL_FQ void m01770_s08 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_s08 (KERN_ATTR_BASIC ()) { } -KERNEL_FQ void m01770_s16 (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_s16 (KERN_ATTR_BASIC ()) { } diff --git a/OpenCL/m01770_a1-pure.cl b/OpenCL/m21000_a1-pure.cl similarity index 97% rename from OpenCL/m01770_a1-pure.cl rename to OpenCL/m21000_a1-pure.cl index 3c57255d6..f5782bbd4 100644 --- a/OpenCL/m01770_a1-pure.cl +++ b/OpenCL/m21000_a1-pure.cl @@ -14,7 +14,7 @@ #include "inc_hash_sha512.cl" #endif -KERNEL_FQ void m01770_mxx (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_mxx (KERN_ATTR_BASIC ()) { /** * modifier @@ -83,7 +83,7 @@ KERNEL_FQ void m01770_mxx (KERN_ATTR_BASIC ()) } } -KERNEL_FQ void m01770_sxx (KERN_ATTR_BASIC ()) +KERNEL_FQ void m21000_sxx (KERN_ATTR_BASIC ()) { /** * modifier diff --git a/OpenCL/m01770_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl similarity index 95% rename from OpenCL/m01770_a3-optimized.cl rename to OpenCL/m21000_a3-optimized.cl index 541110eab..768ba9e02 100644 --- a/OpenCL/m01770_a3-optimized.cl +++ b/OpenCL/m21000_a3-optimized.cl @@ -218,7 +218,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * digest[7] = h; } -DECLSPEC void m01770m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +DECLSPEC void m21000m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) { /** * modifier @@ -311,7 +311,7 @@ DECLSPEC void m01770m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) } } -DECLSPEC void m01770s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +DECLSPEC void m21000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) { /** * modifier @@ -416,7 +416,7 @@ DECLSPEC void m01770s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) } } -KERNEL_FQ void m01770_m04 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_m04 (KERN_ATTR_VECTOR ()) { /** * base @@ -451,10 +451,10 @@ KERNEL_FQ void m01770_m04 (KERN_ATTR_VECTOR ()) * main */ - m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } -KERNEL_FQ void m01770_m08 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ()) { /** * base @@ -489,10 +489,10 @@ KERNEL_FQ void m01770_m08 (KERN_ATTR_VECTOR ()) * main */ - m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } -KERNEL_FQ void m01770_m16 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ()) { /** * base @@ -527,10 +527,10 @@ KERNEL_FQ void m01770_m16 (KERN_ATTR_VECTOR ()) * main */ - m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } -KERNEL_FQ void m01770_s04 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ()) { /** * base @@ -565,10 +565,10 @@ KERNEL_FQ void m01770_s04 (KERN_ATTR_VECTOR ()) * main */ - m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } -KERNEL_FQ void m01770_s08 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ()) { /** * base @@ -603,10 +603,10 @@ KERNEL_FQ void m01770_s08 (KERN_ATTR_VECTOR ()) * main */ - m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } -KERNEL_FQ void m01770_s16 (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ()) { /** * base @@ -641,5 +641,5 @@ KERNEL_FQ void m01770_s16 (KERN_ATTR_VECTOR ()) * main */ - m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); } diff --git a/OpenCL/m01770_a3-pure.cl b/OpenCL/m21000_a3-pure.cl similarity index 97% rename from OpenCL/m01770_a3-pure.cl rename to OpenCL/m21000_a3-pure.cl index 3a4a27c14..7ff68d9cb 100644 --- a/OpenCL/m01770_a3-pure.cl +++ b/OpenCL/m21000_a3-pure.cl @@ -14,7 +14,7 @@ #include "inc_hash_sha512.cl" #endif -KERNEL_FQ void m01770_mxx (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_mxx (KERN_ATTR_VECTOR ()) { /** * modifier @@ -96,7 +96,7 @@ KERNEL_FQ void m01770_mxx (KERN_ATTR_VECTOR ()) } } -KERNEL_FQ void m01770_sxx (KERN_ATTR_VECTOR ()) +KERNEL_FQ void m21000_sxx (KERN_ATTR_VECTOR ()) { /** * modifier diff --git a/docs/readme.txt b/docs/readme.txt index 6ab191951..9e36b73ce 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -92,7 +92,6 @@ NVIDIA GPUs require "NVIDIA Driver" (418.56 or later) and "CUDA Toolkit" (10.1 o - sha512($salt.$pass) - sha512(utf16le($pass).$salt) - sha512($salt.utf16le($pass)) -- BitShares v0.x - sha512(sha512(pass)) - HMAC-MD5 (key = $pass) - HMAC-MD5 (key = $salt) - HMAC-SHA1 (key = $pass) diff --git a/src/modules/module_01770.c b/src/modules/module_21000.c similarity index 99% rename from src/modules/module_01770.c rename to src/modules/module_21000.c index a12c4b53f..f3a631193 100644 --- a/src/modules/module_01770.c +++ b/src/modules/module_21000.c @@ -18,7 +18,7 @@ static const u32 DGST_POS3 = 7; static const u32 DGST_SIZE = DGST_SIZE_8_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; static const char *HASH_NAME = "BitShares v0.x - sha512(sha512(pass))"; -static const u64 KERN_TYPE = 1770; +static const u64 KERN_TYPE = 21000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_EARLY_SKIP diff --git a/tools/test_modules/m01770.pm b/tools/test_modules/m21000.pm similarity index 100% rename from tools/test_modules/m01770.pm rename to tools/test_modules/m21000.pm From d045c0f62a837bd741cf47d4ee6d6f0f72cfab72 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 31 Jul 2019 15:21:03 +0200 Subject: [PATCH 4/5] fix bug in 21000 a3 kernel --- OpenCL/m21000_a3-pure.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL/m21000_a3-pure.cl b/OpenCL/m21000_a3-pure.cl index 7ff68d9cb..87c56c54f 100644 --- a/OpenCL/m21000_a3-pure.cl +++ b/OpenCL/m21000_a3-pure.cl @@ -60,7 +60,7 @@ KERNEL_FQ void m21000_mxx (KERN_ATTR_VECTOR ()) sha512_final_vector (&ctx0); - sha512_ctx_t ctx; + sha512_ctx_vector_t ctx; sha512_init_vector (&ctx); @@ -154,7 +154,7 @@ KERNEL_FQ void m21000_sxx (KERN_ATTR_VECTOR ()) sha512_final_vector (&ctx0); - sha512_ctx_t ctx; + sha512_ctx_vector_t ctx; sha512_init_vector (&ctx); From ca0ef1b70eeea84e504a93ed71589e73d3bf1e1f Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 31 Jul 2019 15:30:39 +0200 Subject: [PATCH 5/5] update hash-name --- src/modules/module_21000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/module_21000.c b/src/modules/module_21000.c index f3a631193..41e32c006 100644 --- a/src/modules/module_21000.c +++ b/src/modules/module_21000.c @@ -17,7 +17,7 @@ static const u32 DGST_POS2 = 6; static const u32 DGST_POS3 = 7; static const u32 DGST_SIZE = DGST_SIZE_8_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; -static const char *HASH_NAME = "BitShares v0.x - sha512(sha512(pass))"; +static const char *HASH_NAME = "BitShares v0.x - sha512(sha512_bin(pass))"; static const u64 KERN_TYPE = 21000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT