From 0a5575cde5de93f988208fca238bca3059cffc46 Mon Sep 17 00:00:00 2001 From: tweqx Date: Sat, 21 May 2022 15:07:51 +0200 Subject: [PATCH 1/5] Add BLAKE2b-512($salt.$pass) and BLAKE2b-512($pass.$salt) --- OpenCL/inc_hash_blake2b.cl | 30 ++ OpenCL/inc_hash_blake2b.h | 1 + OpenCL/m00610_a0-optimized.cl | 360 +++++++++++++++++++++ OpenCL/m00610_a0-pure.cl | 133 ++++++++ OpenCL/m00610_a1-optimized.cl | 478 +++++++++++++++++++++++++++ OpenCL/m00610_a1-pure.cl | 131 ++++++++ OpenCL/m00610_a3-optimized.cl | 534 ++++++++++++++++++++++++++++++ OpenCL/m00610_a3-pure.cl | 158 +++++++++ OpenCL/m00620_a0-optimized.cl | 316 ++++++++++++++++++ OpenCL/m00620_a0-pure.cl | 121 +++++++ OpenCL/m00620_a1-optimized.cl | 434 +++++++++++++++++++++++++ OpenCL/m00620_a1-pure.cl | 114 +++++++ OpenCL/m00620_a3-optimized.cl | 590 ++++++++++++++++++++++++++++++++++ OpenCL/m00620_a3-pure.cl | 148 +++++++++ docs/changes.txt | 2 + docs/readme.txt | 2 + src/modules/module_00610.c | 221 +++++++++++++ src/modules/module_00620.c | 221 +++++++++++++ tools/test_modules/m00610.pm | 44 +++ tools/test_modules/m00620.pm | 44 +++ 20 files changed, 4082 insertions(+) create mode 100644 OpenCL/m00610_a0-optimized.cl create mode 100644 OpenCL/m00610_a0-pure.cl create mode 100644 OpenCL/m00610_a1-optimized.cl create mode 100644 OpenCL/m00610_a1-pure.cl create mode 100644 OpenCL/m00610_a3-optimized.cl create mode 100644 OpenCL/m00610_a3-pure.cl create mode 100644 OpenCL/m00620_a0-optimized.cl create mode 100644 OpenCL/m00620_a0-pure.cl create mode 100644 OpenCL/m00620_a1-optimized.cl create mode 100644 OpenCL/m00620_a1-pure.cl create mode 100644 OpenCL/m00620_a3-optimized.cl create mode 100644 OpenCL/m00620_a3-pure.cl create mode 100644 src/modules/module_00610.c create mode 100644 src/modules/module_00620.c create mode 100644 tools/test_modules/m00610.pm create mode 100644 tools/test_modules/m00620.pm diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl index 9df986d1c..6b3bbe950 100644 --- a/OpenCL/inc_hash_blake2b.cl +++ b/OpenCL/inc_hash_blake2b.cl @@ -660,6 +660,36 @@ DECLSPEC void blake2b_init_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx) ctx->len = 0; } +DECLSPEC void blake2b_init_vector_from_scalar(blake2b_ctx_vector_t* ctx, blake2b_ctx_t* ctx0) { + ctx->h[0] = ctx0->h[0]; + ctx->h[1] = ctx0->h[1]; + ctx->h[2] = ctx0->h[2]; + ctx->h[3] = ctx0->h[3]; + ctx->h[4] = ctx0->h[4]; + ctx->h[5] = ctx0->h[5]; + ctx->h[6] = ctx0->h[6]; + ctx->h[7] = ctx0->h[7]; + + ctx->m[ 0] = ctx0->m[ 0]; + ctx->m[ 1] = ctx0->m[ 1]; + ctx->m[ 2] = ctx0->m[ 2]; + ctx->m[ 3] = ctx0->m[ 3]; + ctx->m[ 4] = ctx0->m[ 4]; + ctx->m[ 5] = ctx0->m[ 5]; + ctx->m[ 6] = ctx0->m[ 6]; + ctx->m[ 7] = ctx0->m[ 7]; + ctx->m[ 8] = ctx0->m[ 8]; + ctx->m[ 9] = ctx0->m[ 9]; + ctx->m[10] = ctx0->m[10]; + ctx->m[11] = ctx0->m[11]; + ctx->m[12] = ctx0->m[12]; + ctx->m[13] = ctx0->m[13]; + ctx->m[14] = ctx0->m[14]; + ctx->m[15] = ctx0->m[15]; + + ctx->len = ctx0->len; +} + DECLSPEC void blake2b_update_vector_128 (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const int len) { if (len == 0) return; diff --git a/OpenCL/inc_hash_blake2b.h b/OpenCL/inc_hash_blake2b.h index b7d6e134c..d4cfb96d4 100644 --- a/OpenCL/inc_hash_blake2b.h +++ b/OpenCL/inc_hash_blake2b.h @@ -92,6 +92,7 @@ DECLSPEC void blake2b_final (PRIVATE_AS blake2b_ctx_t *ctx); DECLSPEC void blake2b_transform_vector (PRIVATE_AS u64x *h, PRIVATE_AS const u64x *m, const u32x len, const u64 f0); DECLSPEC void blake2b_init_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx); +DECLSPEC void blake2b_init_vector_from_scalar(PRIVATE_AS blake2b_ctx_vector_t* ctx, PRIVATE_AS blake2b_ctx_t* ctx0); DECLSPEC void blake2b_update_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len); DECLSPEC void blake2b_final_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx); diff --git a/OpenCL/m00610_a0-optimized.cl b/OpenCL/m00610_a0-optimized.cl new file mode 100644 index 000000000..01b29a159 --- /dev/null +++ b/OpenCL/m00610_a0-optimized.cl @@ -0,0 +1,360 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00610_m04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00610_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00610_s04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + /** + * salt + */ + + const u32 pw_len = pws[gid].pw_len & 63; + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00610_s16 (KERN_ATTR_RULES ()) +{ +} + diff --git a/OpenCL/m00610_a0-pure.cl b/OpenCL/m00610_a0-pure.cl new file mode 100644 index 000000000..f3d98ff9e --- /dev/null +++ b/OpenCL/m00610_a0-pure.cl @@ -0,0 +1,133 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00610_mxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2b_ctx_t ctx; + blake2b_init (&ctx); + + blake2b_update (&ctx, tmp.i, tmp.pw_len); + blake2b_update (&ctx, s, salt_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_sxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2b_ctx_t ctx; + blake2b_init (&ctx); + + blake2b_update (&ctx, tmp.i, tmp.pw_len); + blake2b_update (&ctx, s, salt_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00610_a1-optimized.cl b/OpenCL/m00610_a1-optimized.cl new file mode 100644 index 000000000..e50f6955c --- /dev/null +++ b/OpenCL/m00610_a1-optimized.cl @@ -0,0 +1,478 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00610_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00610_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00610_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[0] |= s3[2]; + w3[1] |= s3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00610_s16 (KERN_ATTR_BASIC ()) +{ +} + diff --git a/OpenCL/m00610_a1-pure.cl b/OpenCL/m00610_a1-pure.cl new file mode 100644 index 000000000..9eeabb060 --- /dev/null +++ b/OpenCL/m00610_a1-pure.cl @@ -0,0 +1,131 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00610_mxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2b_ctx_t ctx = ctx0; + + blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2b_update (&ctx, s, salt_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_sxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2b_ctx_t ctx = ctx0; + + blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2b_update (&ctx, s, salt_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} + diff --git a/OpenCL/m00610_a3-optimized.cl b/OpenCL/m00610_a3-optimized.cl new file mode 100644 index 000000000..7a406b40e --- /dev/null +++ b/OpenCL/m00610_a3-optimized.cl @@ -0,0 +1,534 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) +{ + /** + * modifiers are taken from args + */ + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + const u32x w0x = w0l | w0r; + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) +{ + /** + * modifiers are taken from args + */ + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + const u32x w0x = w0l | w0r; + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00610_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00610_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00610_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00610_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00610_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00610s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + diff --git a/OpenCL/m00610_a3-pure.cl b/OpenCL/m00610_a3-pure.cl new file mode 100644 index 000000000..d61b6f847 --- /dev/null +++ b/OpenCL/m00610_a3-pure.cl @@ -0,0 +1,158 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00610_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2b_ctx_vector_t ctx; + + blake2b_init_vector (&ctx); + + blake2b_update_vector (&ctx, w, pw_len); + + blake2b_update_vector (&ctx, s, salt_len); + + blake2b_final_vector (&ctx); + + const u32x r0 = h32_from_64 (ctx.h[0]); + const u32x r1 = l32_from_64 (ctx.h[0]); + const u32x r2 = h32_from_64 (ctx.h[1]); + const u32x r3 = l32_from_64 (ctx.h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00610_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2b_ctx_vector_t ctx; + + blake2b_init_vector (&ctx); + + blake2b_update_vector (&ctx, w, pw_len); + + blake2b_update_vector (&ctx, s, salt_len); + + blake2b_final_vector (&ctx); + + const u32x r0 = h32_from_64 (ctx.h[0]); + const u32x r1 = l32_from_64 (ctx.h[0]); + const u32x r2 = h32_from_64 (ctx.h[1]); + const u32x r3 = l32_from_64 (ctx.h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + diff --git a/OpenCL/m00620_a0-optimized.cl b/OpenCL/m00620_a0-optimized.cl new file mode 100644 index 000000000..29144dc4a --- /dev/null +++ b/OpenCL/m00620_a0-optimized.cl @@ -0,0 +1,316 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp_optimized.h) +#include M2S(INCLUDE_PATH/inc_rp_optimized.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00620_m04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, out_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00620_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00620_s04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + /** + * salt + */ + + const u32 pw_len = pws[gid].pw_len & 63; + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, out_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00620_s16 (KERN_ATTR_RULES ()) +{ +} + diff --git a/OpenCL/m00620_a0-pure.cl b/OpenCL/m00620_a0-pure.cl new file mode 100644 index 000000000..bcc056611 --- /dev/null +++ b/OpenCL/m00620_a0-pure.cl @@ -0,0 +1,121 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00620_mxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2b_ctx_t ctx = ctx0; + + blake2b_update (&ctx, tmp.i, tmp.pw_len); + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_sxx (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + blake2b_ctx_t ctx = ctx0; + + blake2b_update (&ctx, tmp.i, tmp.pw_len); + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00620_a1-optimized.cl b/OpenCL/m00620_a1-optimized.cl new file mode 100644 index 000000000..fa50e5d89 --- /dev/null +++ b/OpenCL/m00620_a1-optimized.cl @@ -0,0 +1,434 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00620_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00620_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00620_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00620_s16 (KERN_ATTR_BASIC ()) +{ +} + diff --git a/OpenCL/m00620_a1-pure.cl b/OpenCL/m00620_a1-pure.cl new file mode 100644 index 000000000..70ffccc8c --- /dev/null +++ b/OpenCL/m00620_a1-pure.cl @@ -0,0 +1,114 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00620_mxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2b_ctx_t ctx = ctx0; + + blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_sxx (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + blake2b_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + blake2b_ctx_t ctx = ctx0; + + blake2b_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + blake2b_final (&ctx); + + const u32 r0 = h32_from_64_S (ctx.h[0]); + const u32 r1 = l32_from_64_S (ctx.h[0]); + const u32 r2 = h32_from_64_S (ctx.h[1]); + const u32 r3 = l32_from_64_S (ctx.h[1]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} + diff --git a/OpenCL/m00620_a3-optimized.cl b/OpenCL/m00620_a3-optimized.cl new file mode 100644 index 000000000..64cb352c5 --- /dev/null +++ b/OpenCL/m00620_a3-optimized.cl @@ -0,0 +1,590 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +DECLSPEC void m00620m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ()) +{ + /** + * modifiers are taken from args + */ + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (t0[1], t0[0]); + m[ 1] = hl32_to_64 (t0[3], t0[2]); + m[ 2] = hl32_to_64 (t1[1], t1[0]); + m[ 3] = hl32_to_64 (t1[3], t1[2]); + m[ 4] = hl32_to_64 (t2[1], t2[0]); + m[ 5] = hl32_to_64 (t2[3], t2[2]); + m[ 6] = hl32_to_64 (t3[1], t3[0]); + m[ 7] = hl32_to_64 (t3[3], t3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m00620s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ()) +{ + /** + * modifiers are taken from args + */ + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + /** + * blake2b + */ + + u64x m[16]; + + m[ 0] = hl32_to_64 (t0[1], t0[0]); + m[ 1] = hl32_to_64 (t0[3], t0[2]); + m[ 2] = hl32_to_64 (t1[1], t1[0]); + m[ 3] = hl32_to_64 (t1[3], t1[2]); + m[ 4] = hl32_to_64 (t2[1], t2[0]); + m[ 5] = hl32_to_64 (t2[3], t2[2]); + m[ 6] = hl32_to_64 (t3[1], t3[0]); + m[ 7] = hl32_to_64 (t3[3], t3[2]); + m[ 8] = 0; + m[ 9] = 0; + m[10] = 0; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0; + + u64x h[8]; + + h[0] = BLAKE2B_IV_00 ^ 0x01010040; + h[1] = BLAKE2B_IV_01; + h[2] = BLAKE2B_IV_02; + h[3] = BLAKE2B_IV_03; + h[4] = BLAKE2B_IV_04; + h[5] = BLAKE2B_IV_05; + h[6] = BLAKE2B_IV_06; + h[7] = BLAKE2B_IV_07; + + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + + const u32x r0 = h32_from_64 (h[0]); + const u32x r1 = l32_from_64 (h[0]); + const u32x r2 = h32_from_64 (h[1]); + const u32x r3 = l32_from_64 (h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00620_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00620_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00620_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00620_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + +KERNEL_FQ void m00620_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00620s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); +} + diff --git a/OpenCL/m00620_a3-pure.cl b/OpenCL/m00620_a3-pure.cl new file mode 100644 index 000000000..2b34cd3ba --- /dev/null +++ b/OpenCL/m00620_a3-pure.cl @@ -0,0 +1,148 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) +#endif + +KERNEL_FQ void m00620_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2b_ctx_vector_t ctx; + + blake2b_init_vector_from_scalar (&ctx, &ctx0); + + blake2b_update_vector (&ctx, w, pw_len); + + blake2b_final_vector (&ctx); + + const u32x r0 = h32_from_64 (ctx.h[0]); + const u32x r1 = l32_from_64 (ctx.h[0]); + const u32x r2 = h32_from_64 (ctx.h[1]); + const u32x r3 = l32_from_64 (ctx.h[1]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00620_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + blake2b_ctx_t ctx0; + + blake2b_init (&ctx0); + + blake2b_update_global (&ctx0, salt_bufs[SALT_POS_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + blake2b_ctx_vector_t ctx; + + blake2b_init_vector_from_scalar (&ctx, &ctx0); + + blake2b_update_vector (&ctx, w, pw_len); + + blake2b_final_vector (&ctx); + + const u32x r0 = h32_from_64 (ctx.h[0]); + const u32x r1 = l32_from_64 (ctx.h[0]); + const u32x r2 = h32_from_64 (ctx.h[1]); + const u32x r3 = l32_from_64 (ctx.h[1]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + diff --git a/docs/changes.txt b/docs/changes.txt index 383845c34..d6e4b7448 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -4,6 +4,8 @@ ## Algorithms ## +- Added hash-mode: BLAKE2b-512($salt.$pass) +- Added hash-mode: BLAKE2b-512($pass.$salt) - Added hash-mode: Amazon AWS4-HMAC-SHA256 - Added hash-mode: DPAPI masterkey file v1 (context 3) - Added hash-mode: DPAPI masterkey file v2 (context 3) diff --git a/docs/readme.txt b/docs/readme.txt index 6cee08f1a..ec40b8ad9 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -124,6 +124,8 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - sha512($salt.$pass) - sha512($salt.utf16le($pass)) - sha512(utf16le($pass).$salt) +- BLAKE2b-512($pass.$salt) +- BLAKE2b-512($salt.$pass) - HMAC-MD5 (key = $pass) - HMAC-MD5 (key = $salt) - HMAC-SHA1 (key = $pass) diff --git a/src/modules/module_00610.c b/src/modules/module_00610.c new file mode 100644 index 000000000..6dd11a126 --- /dev/null +++ b/src/modules/module_00610.c @@ -0,0 +1,221 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 1; +static const u32 DGST_POS1 = 0; +static const u32 DGST_POS2 = 3; +static const u32 DGST_POS3 = 2; +static const u32 DGST_SIZE = DGST_SIZE_8_8; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED; +static const char *HASH_NAME = "BLAKE2b-512($pass.$salt)"; +static const u64 KERN_TYPE = 610; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_APPENDED_SALT + | OPTI_TYPE_USES_BITS_64 + | OPTI_TYPE_RAW_HASH; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_GENERIC; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$BLAKE2$41fcd44c789c735c08b43a871b81c8f617ca43918d38aee6cf8291c58a0b00a03115857425e5ff6f044be7a5bec8536b52d6c9992e21cd43cdca8a55bbf1f5c1:1033"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +static const char *SIGNATURE_BLAKE2B = "$BLAKE2$"; + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u64 *digest = (u64 *) digest_buf; + + hc_token_t token; + + token.token_cnt = 3; + + // signature + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_BLAKE2B; + + token.len[0] = 8; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + // hash + token.sep[1] = hashconfig->separator; + token.len_min[1] = 128; + token.len_max[1] = 128; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // salt + token.len_min[2] = SALT_MIN; + token.len_max[2] = SALT_MAX; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH; + + if (hashconfig->opts_type & OPTS_TYPE_ST_HEX) { + token.len_min[2] *= 2; + token.len_max[2] *= 2; + + token.attr[2] |= TOKEN_ATTR_VERIFY_HEX; + } + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_pos = token.buf[1]; + + digest[0] = hex_to_u64 (hash_pos + 0); + digest[1] = hex_to_u64 (hash_pos + 16); + digest[2] = hex_to_u64 (hash_pos + 32); + digest[3] = hex_to_u64 (hash_pos + 48); + digest[4] = hex_to_u64 (hash_pos + 64); + digest[5] = hex_to_u64 (hash_pos + 80); + digest[6] = hex_to_u64 (hash_pos + 96); + digest[7] = hex_to_u64 (hash_pos + 112); + + // process salt + + const u8 *salt_pos = token.buf[2]; + const int salt_len = token.len[2]; + + const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len); + + if (parse_rc == false) return (PARSER_SALT_LENGTH); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u64 *digest = (const u64 *) digest_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u8 *out_buf = (u8 *) line_buf; + + int out_len = strlen (SIGNATURE_BLAKE2B); + + // signature + memcpy (out_buf, SIGNATURE_BLAKE2B, out_len); + + // hash + u64_to_hex (digest[0], out_buf + out_len); out_len += 16; + u64_to_hex (digest[1], out_buf + out_len); out_len += 16; + u64_to_hex (digest[2], out_buf + out_len); out_len += 16; + u64_to_hex (digest[3], out_buf + out_len); out_len += 16; + u64_to_hex (digest[4], out_buf + out_len); out_len += 16; + u64_to_hex (digest[5], out_buf + out_len); out_len += 16; + u64_to_hex (digest[6], out_buf + out_len); out_len += 16; + u64_to_hex (digest[7], out_buf + out_len); out_len += 16; + + // : + out_buf[out_len] = hashconfig->separator; + out_len += 1; + + // salt + out_len += generic_salt_encode (hashconfig, (const u8 *) salt->salt_buf, (const int) salt->salt_len, out_buf + out_len); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/src/modules/module_00620.c b/src/modules/module_00620.c new file mode 100644 index 000000000..52bffc26e --- /dev/null +++ b/src/modules/module_00620.c @@ -0,0 +1,221 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 1; +static const u32 DGST_POS1 = 0; +static const u32 DGST_POS2 = 3; +static const u32 DGST_POS3 = 2; +static const u32 DGST_SIZE = DGST_SIZE_8_8; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED; +static const char *HASH_NAME = "BLAKE2b-512($salt.$pass)"; +static const u64 KERN_TYPE = 620; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PREPENDED_SALT + | OPTI_TYPE_USES_BITS_64 + | OPTI_TYPE_RAW_HASH; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_GENERIC; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$BLAKE2$f0325fdfc3f82a014935442f7adbc069d4636d67276a85b09f8de368f122cf5195a0b780d7fee709fbf1dcd02ddcb581df84508cf1fb0f3393af1be0565491c6:3301"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +static const char *SIGNATURE_BLAKE2B = "$BLAKE2$"; + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u64 *digest = (u64 *) digest_buf; + + hc_token_t token; + + token.token_cnt = 3; + + // signature + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_BLAKE2B; + + token.len[0] = 8; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + // hash + token.sep[1] = hashconfig->separator; + token.len_min[1] = 128; + token.len_max[1] = 128; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // salt + token.len_min[2] = SALT_MIN; + token.len_max[2] = SALT_MAX; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH; + + if (hashconfig->opts_type & OPTS_TYPE_ST_HEX) { + token.len_min[2] *= 2; + token.len_max[2] *= 2; + + token.attr[2] |= TOKEN_ATTR_VERIFY_HEX; + } + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_pos = token.buf[1]; + + digest[0] = hex_to_u64 (hash_pos + 0); + digest[1] = hex_to_u64 (hash_pos + 16); + digest[2] = hex_to_u64 (hash_pos + 32); + digest[3] = hex_to_u64 (hash_pos + 48); + digest[4] = hex_to_u64 (hash_pos + 64); + digest[5] = hex_to_u64 (hash_pos + 80); + digest[6] = hex_to_u64 (hash_pos + 96); + digest[7] = hex_to_u64 (hash_pos + 112); + + // process salt + + const u8 *salt_pos = token.buf[2]; + const int salt_len = token.len[2]; + + const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len); + + if (parse_rc == false) return (PARSER_SALT_LENGTH); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u64 *digest = (const u64 *) digest_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u8 *out_buf = (u8 *) line_buf; + + int out_len = strlen (SIGNATURE_BLAKE2B); + + // signature + memcpy (out_buf, SIGNATURE_BLAKE2B, out_len); + + // hash + u64_to_hex (digest[0], out_buf + out_len); out_len += 16; + u64_to_hex (digest[1], out_buf + out_len); out_len += 16; + u64_to_hex (digest[2], out_buf + out_len); out_len += 16; + u64_to_hex (digest[3], out_buf + out_len); out_len += 16; + u64_to_hex (digest[4], out_buf + out_len); out_len += 16; + u64_to_hex (digest[5], out_buf + out_len); out_len += 16; + u64_to_hex (digest[6], out_buf + out_len); out_len += 16; + u64_to_hex (digest[7], out_buf + out_len); out_len += 16; + + // : + out_buf[out_len] = hashconfig->separator; + out_len += 1; + + // salt + out_len += generic_salt_encode (hashconfig, (const u8 *) salt->salt_buf, (const int) salt->salt_len, out_buf + out_len); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m00610.pm b/tools/test_modules/m00610.pm new file mode 100644 index 000000000..b8f5a224d --- /dev/null +++ b/tools/test_modules/m00610.pm @@ -0,0 +1,44 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::BLAKE2 qw (blake2b_hex); + +sub module_constraints { [[0, 256], [0, 256], [0, 64], [0, 64], [0, 64]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + + my $digest = blake2b_hex ($word . $salt); + + my $hash = sprintf ("\$BLAKE2\$%s:%s", $digest, $salt); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $salt, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $salt; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, $salt); + + return ($new_hash, $word); +} + +1; diff --git a/tools/test_modules/m00620.pm b/tools/test_modules/m00620.pm new file mode 100644 index 000000000..b3a9cba65 --- /dev/null +++ b/tools/test_modules/m00620.pm @@ -0,0 +1,44 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::BLAKE2 qw (blake2b_hex); + +sub module_constraints { [[0, 256], [0, 256], [0, 64], [0, 64], [0, 64]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + + my $digest = blake2b_hex ($salt . $word); + + my $hash = sprintf ("\$BLAKE2\$%s:%s", $digest, $salt); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $salt, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $salt; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, $salt); + + return ($new_hash, $word); +} + +1; From 9ce30defcbba758382460bd01e7ce1821e42264b Mon Sep 17 00:00:00 2001 From: tweqx Date: Sat, 21 May 2022 19:32:39 +0200 Subject: [PATCH 2/5] Don't apply the salt in the a3 BLAKE2b($pass.$salt) optimized OpenCL code --- OpenCL/m00610_a3-optimized.cl | 179 ++++------------------------------ 1 file changed, 18 insertions(+), 161 deletions(-) diff --git a/OpenCL/m00610_a3-optimized.cl b/OpenCL/m00610_a3-optimized.cl index 7a406b40e..7402791ef 100644 --- a/OpenCL/m00610_a3-optimized.cl +++ b/OpenCL/m00610_a3-optimized.cl @@ -20,38 +20,6 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO * modifiers are taken from args */ - /** - * salt - */ - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - u32 salt_buf3[4]; - - salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; - salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; - salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; - salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; - salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; - salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; - - const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - /** * loop */ @@ -63,59 +31,20 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = w0x; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w0[1], w0[0]); - m[ 1] = hl32_to_64 (w0[3], w0[2]); - m[ 2] = hl32_to_64 (w1[1], w1[0]); - m[ 3] = hl32_to_64 (w1[3], w1[2]); - m[ 4] = hl32_to_64 (w2[1], w2[0]); - m[ 5] = hl32_to_64 (w2[3], w2[2]); - m[ 6] = hl32_to_64 (w3[1], w3[0]); - m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 0] = hl32_to_64 (w[ 1], w0x ); + m[ 1] = hl32_to_64 (w[ 3], w[ 2]); + m[ 2] = hl32_to_64 (w[ 5], w[ 4]); + m[ 3] = hl32_to_64 (w[ 7], w[ 6]); + m[ 4] = hl32_to_64 (w[ 9], w[ 8]); + m[ 5] = hl32_to_64 (w[11], w[10]); + m[ 6] = hl32_to_64 (w[13], w[12]); + m[ 7] = hl32_to_64 (w[15], w[14]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -136,7 +65,7 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]); @@ -165,38 +94,6 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] }; - /** - * salt - */ - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - u32 salt_buf3[4]; - - salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; - salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; - salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; - salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; - salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; - salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; - - const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - /** * loop */ @@ -208,59 +105,20 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = w0x; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w0[1], w0[0]); - m[ 1] = hl32_to_64 (w0[3], w0[2]); - m[ 2] = hl32_to_64 (w1[1], w1[0]); - m[ 3] = hl32_to_64 (w1[3], w1[2]); - m[ 4] = hl32_to_64 (w2[1], w2[0]); - m[ 5] = hl32_to_64 (w2[3], w2[2]); - m[ 6] = hl32_to_64 (w3[1], w3[0]); - m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 0] = hl32_to_64 (w[ 1], w0x ); + m[ 1] = hl32_to_64 (w[ 3], w[ 2]); + m[ 2] = hl32_to_64 (w[ 5], w[ 4]); + m[ 3] = hl32_to_64 (w[ 7], w[ 6]); + m[ 4] = hl32_to_64 (w[ 9], w[ 8]); + m[ 5] = hl32_to_64 (w[11], w[10]); + m[ 6] = hl32_to_64 (w[13], w[12]); + m[ 7] = hl32_to_64 (w[15], w[14]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -281,7 +139,7 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]); @@ -531,4 +389,3 @@ KERNEL_FQ void m00610_s16 (KERN_ATTR_VECTOR ()) m00610s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } - From 97a119da82dc34a52b5a1ae0eef8b55e27f32170 Mon Sep 17 00:00:00 2001 From: tweqx Date: Mon, 23 May 2022 16:53:50 +0200 Subject: [PATCH 3/5] In the unit tests, convert the hexdigest to lowercase to match the behavior of 'module_hash_encode' --- tools/test_modules/m00610.pm | 2 +- tools/test_modules/m00620.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/test_modules/m00610.pm b/tools/test_modules/m00610.pm index b8f5a224d..61ad8e43b 100644 --- a/tools/test_modules/m00610.pm +++ b/tools/test_modules/m00610.pm @@ -19,7 +19,7 @@ sub module_generate_hash my $digest = blake2b_hex ($word . $salt); - my $hash = sprintf ("\$BLAKE2\$%s:%s", $digest, $salt); + my $hash = sprintf ("\$BLAKE2\$%s:%s", lc ($digest), $salt); return $hash; } diff --git a/tools/test_modules/m00620.pm b/tools/test_modules/m00620.pm index b3a9cba65..6c0c9e714 100644 --- a/tools/test_modules/m00620.pm +++ b/tools/test_modules/m00620.pm @@ -19,7 +19,7 @@ sub module_generate_hash my $digest = blake2b_hex ($salt . $word); - my $hash = sprintf ("\$BLAKE2\$%s:%s", $digest, $salt); + my $hash = sprintf ("\$BLAKE2\$%s:%s", lc ($digest), $salt); return $hash; } From ebcf5bfe20136477f2033aeeb82e9440df1d3139 Mon Sep 17 00:00:00 2001 From: tweqx Date: Wed, 25 May 2022 20:43:32 +0200 Subject: [PATCH 4/5] Partially revert 9ce30defc: apply salt in the a3 610 multi kernel --- OpenCL/m00610_a3-optimized.cl | 90 +++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/OpenCL/m00610_a3-optimized.cl b/OpenCL/m00610_a3-optimized.cl index 7402791ef..1ebbffb51 100644 --- a/OpenCL/m00610_a3-optimized.cl +++ b/OpenCL/m00610_a3-optimized.cl @@ -14,12 +14,45 @@ #include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) #endif + DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) { /** * modifiers are taken from args */ + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + /** * loop */ @@ -31,20 +64,59 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w[ 1], w0x ); - m[ 1] = hl32_to_64 (w[ 3], w[ 2]); - m[ 2] = hl32_to_64 (w[ 5], w[ 4]); - m[ 3] = hl32_to_64 (w[ 7], w[ 6]); - m[ 4] = hl32_to_64 (w[ 9], w[ 8]); - m[ 5] = hl32_to_64 (w[11], w[10]); - m[ 6] = hl32_to_64 (w[13], w[12]); - m[ 7] = hl32_to_64 (w[15], w[14]); + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -65,7 +137,7 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]); From 42c4c1d72faf3de0c68d23e29a5fb035746352f9 Mon Sep 17 00:00:00 2001 From: tweqx Date: Wed, 25 May 2022 22:59:27 +0200 Subject: [PATCH 5/5] In the a3 610 kernel single function, apply correctly the last two characters of the salt --- OpenCL/m00610_a1-optimized.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL/m00610_a1-optimized.cl b/OpenCL/m00610_a1-optimized.cl index e50f6955c..a6728bce4 100644 --- a/OpenCL/m00610_a1-optimized.cl +++ b/OpenCL/m00610_a1-optimized.cl @@ -420,8 +420,8 @@ KERNEL_FQ void m00610_s04 (KERN_ATTR_BASIC ()) w2[3] |= s2[3]; w3[0] |= s3[0]; w3[1] |= s3[1]; - w3[0] |= s3[2]; - w3[1] |= s3[3]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; /** * blake2b