From 42dfa6b543dba3a7f487a3d6996ce8af058f2dee Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 31 Mar 2021 11:05:22 +0200 Subject: [PATCH] Add optimized -m 24800 kernels --- OpenCL/m24800_a0-optimized.cl | 362 ++++++++++++++++++++ OpenCL/m24800_a0-pure.cl | 29 +- OpenCL/m24800_a1-optimized.cl | 464 ++++++++++++++++++++++++++ OpenCL/m24800_a1-pure.cl | 25 +- OpenCL/m24800_a3-optimized.cl | 612 ++++++++++++++++++++++++++++++++++ OpenCL/m24800_a3-pure.cl | 27 +- docs/changes.txt | 1 + docs/readme.txt | 1 + src/modules/module_24800.c | 16 +- tools/test_modules/m24800.pm | 13 +- 10 files changed, 1499 insertions(+), 51 deletions(-) create mode 100644 OpenCL/m24800_a0-optimized.cl create mode 100644 OpenCL/m24800_a1-optimized.cl create mode 100644 OpenCL/m24800_a3-optimized.cl diff --git a/OpenCL/m24800_a0-optimized.cl b/OpenCL/m24800_a0-optimized.cl new file mode 100644 index 000000000..9ef086f7c --- /dev/null +++ b/OpenCL/m24800_a0-optimized.cl @@ -0,0 +1,362 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24800_a0-pure.cl b/OpenCL/m24800_a0-pure.cl index a75b38a45..4140ed966 100644 --- a/OpenCL/m24800_a0-pure.cl +++ b/OpenCL/m24800_a0-pure.cl @@ -33,9 +33,6 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - u32 t[128] = { 0 }; - - /** * loop */ @@ -46,15 +43,18 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_RULES ()) tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); - // we need to swap the endian before we convert to unicode. + // swap endian for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 4, idx += 1) { - tmp.i[idx] = hc_swap32(tmp.i[idx]); + tmp.i[idx] = hc_swap32 (tmp.i[idx]); } + u32 t[128] = { 0 }; + // make it unicode. - for(u32 i = 0, idx = 0; idx < tmp.pw_len; i += 2, idx += 1){ - make_utf16beN(&tmp.i[idx], &t[i], &t[i+1]); + for (u32 i = 0, idx = 0; idx < tmp.pw_len; i += 2, idx += 1) + { + make_utf16beN (&tmp.i[idx], &t[i], &t[i+1]); } // hash time @@ -104,8 +104,6 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - u32 t[128] = { 0 }; - /** * loop */ @@ -119,20 +117,23 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_RULES ()) // swap endian for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 4, idx += 1) { - tmp.i[idx] = hc_swap32(tmp.i[idx]); + tmp.i[idx] = hc_swap32 (tmp.i[idx]); } + u32 t[128] = { 0 }; + // make it unicode. - for(u32 i = 0, idx = 0; idx < tmp.pw_len; i += 2, idx += 1){ - make_utf16beN(&tmp.i[idx], &t[i], &t[i+1]); + for (u32 i = 0, idx = 0; idx < tmp.pw_len; i += 2, idx += 1) + { + make_utf16beN (&tmp.i[idx], &t[i], &t[i+1]); } // hash time sha1_hmac_ctx_t ctx; - sha1_hmac_init (&ctx, t, tmp.pw_len*2); + sha1_hmac_init (&ctx, t, tmp.pw_len * 2); - sha1_hmac_update (&ctx, t, tmp.pw_len*2); + sha1_hmac_update (&ctx, t, tmp.pw_len * 2); sha1_hmac_final (&ctx); diff --git a/OpenCL/m24800_a1-optimized.cl b/OpenCL/m24800_a1-optimized.cl new file mode 100644 index 000000000..02451900c --- /dev/null +++ b/OpenCL/m24800_a1-optimized.cl @@ -0,0 +1,464 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24800_a1-pure.cl b/OpenCL/m24800_a1-pure.cl index 89a175b5d..df4733feb 100644 --- a/OpenCL/m24800_a1-pure.cl +++ b/OpenCL/m24800_a1-pure.cl @@ -38,8 +38,6 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - u32 t[128] = { 0 }; - /** * loop */ @@ -68,16 +66,19 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ()) c[i] |= w[i]; } + u32 t[128] = { 0 }; + // make it unicode. - for(u32 i = 0, idx = 0; idx < pw_len + comb_len; i += 2, idx += 1){ - make_utf16beN(&c[idx], &t[i], &t[i+1]); + for (u32 i = 0, idx = 0; idx < pw_len + comb_len; i += 2, idx += 1) + { + make_utf16beN (&c[idx], &t[i], &t[i+1]); } sha1_hmac_ctx_t ctx; - sha1_hmac_init (&ctx, t, (pw_len + comb_len)*2); + sha1_hmac_init (&ctx, t, (pw_len + comb_len) * 2); - sha1_hmac_update (&ctx, t, (pw_len + comb_len)*2); + sha1_hmac_update (&ctx, t, (pw_len + comb_len) * 2); sha1_hmac_final (&ctx); @@ -120,7 +121,6 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ()) const u32 pw_len = pws[gid].pw_len; u32 w[64] = { 0 }; - u32 t[128] = { 0 }; for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) { @@ -155,16 +155,19 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ()) c[i] |= w[i]; } + u32 t[128] = { 0 }; + // make it unicode. - for(u32 i = 0, idx = 0; idx < pw_len + comb_len; i += 2, idx += 1){ - make_utf16beN(&c[idx], &t[i], &t[i+1]); + for (u32 i = 0, idx = 0; idx < pw_len + comb_len; i += 2, idx += 1) + { + make_utf16beN (&c[idx], &t[i], &t[i+1]); } sha1_hmac_ctx_t ctx; - sha1_hmac_init (&ctx, t, (pw_len + comb_len)*2); + sha1_hmac_init (&ctx, t, (pw_len + comb_len) * 2); - sha1_hmac_update (&ctx, t, (pw_len + comb_len)*2); + sha1_hmac_update (&ctx, t, (pw_len + comb_len) * 2); sha1_hmac_final (&ctx); diff --git a/OpenCL/m24800_a3-optimized.cl b/OpenCL/m24800_a3-optimized.cl new file mode 100644 index 000000000..f64f8af3c --- /dev/null +++ b/OpenCL/m24800_a3-optimized.cl @@ -0,0 +1,612 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +DECLSPEC void m24800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +DECLSPEC void m24800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max); +} diff --git a/OpenCL/m24800_a3-pure.cl b/OpenCL/m24800_a3-pure.cl index bd9adf804..825b256ca 100644 --- a/OpenCL/m24800_a3-pure.cl +++ b/OpenCL/m24800_a3-pure.cl @@ -32,15 +32,12 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ()) const u32 pw_len = pws[gid].pw_len; u32x w[64] = { 0 }; - u32x t[128] = { 0 }; for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) { w[idx] = pws[gid].i[idx]; } - /* The password is the salt too */ - /** * loop */ @@ -55,15 +52,18 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ()) w[0] = w0; - for(u32 i = 0, idx = 0; idx < pw_len; i += 2, idx += 1){ - make_utf16beN(&w[idx], &t[i], &t[i+1]); + u32x t[128] = { 0 }; + + for (u32 i = 0, idx = 0; idx < pw_len; i += 2, idx += 1) + { + make_utf16beN (&w[idx], &t[i + 0], &t[i + 1]); } sha1_hmac_ctx_vector_t ctx; - sha1_hmac_init_vector (&ctx, t, pw_len*2); + sha1_hmac_init_vector (&ctx, t, pw_len * 2); - sha1_hmac_update_vector (&ctx, t, pw_len*2); + sha1_hmac_update_vector (&ctx, t, pw_len * 2); sha1_hmac_final_vector (&ctx); @@ -106,7 +106,6 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ()) const u32 pw_len = pws[gid].pw_len; u32x w[64] = { 0 }; - u32x t[128] = { 0 }; for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) { @@ -126,15 +125,19 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ()) const u32x w0 = w0l | w0r; w[0] = w0; - for(u32 i = 0, idx = 0; idx < pw_len; i += 2, idx += 1){ - make_utf16beN(&w[idx], &t[i], &t[i+1]); + + u32x t[128] = { 0 }; + + for (u32 i = 0, idx = 0; idx < pw_len; i += 2, idx += 1) + { + make_utf16beN (&w[idx], &t[i + 0], &t[i + 1]); } sha1_hmac_ctx_vector_t ctx; - sha1_hmac_init_vector (&ctx, t, pw_len*2); + sha1_hmac_init_vector (&ctx, t, pw_len * 2); - sha1_hmac_update_vector (&ctx, t, pw_len*2); + sha1_hmac_update_vector (&ctx, t, pw_len * 2); sha1_hmac_final_vector (&ctx); diff --git a/docs/changes.txt b/docs/changes.txt index 51c05e86c..25b9b441e 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -15,6 +15,7 @@ - Added hash-mode: RSA/DSA/EC/OPENSSH Private Keys - Added hash-mode: SQLCipher - Added hash-mode: Dahua Authentication MD5 +- Added hash-mode: Umbraco HMAC-SHA1 - Added hash-mode: sha1(sha1($pass).$salt) ## diff --git a/docs/readme.txt b/docs/readme.txt index da7756e3d..c10adf117 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -325,6 +325,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - SMF (Simple Machines Forum) > v1.1 - MediaWiki B type - Redmine +- Umbraco HMAC-SHA1 - Joomla < 2.5.18 - OpenCart - PrestaShop diff --git a/src/modules/module_24800.c b/src/modules/module_24800.c index 1b46881fc..f7abd0acf 100644 --- a/src/modules/module_24800.c +++ b/src/modules/module_24800.c @@ -22,8 +22,6 @@ static const u64 KERN_TYPE = 24800; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_BE - | OPTS_TYPE_ST_ADD80 - | OPTS_TYPE_ST_ADDBITS15 | OPTS_TYPE_PT_UTF16LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; @@ -47,13 +45,15 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) { u32 *digest = (u32 *) digest_buf; + token_t token; token.token_cnt = 1; token.len_min[0] = 28; token.len_max[0] = 28; - token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH; + token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); @@ -61,7 +61,8 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE const u8 *hash_pos = token.buf[0]; const int hash_len = token.len[0]; - u8 tmp_buf[20] = { 0 }; + + u8 tmp_buf[32] = { 0 }; const int decoded_len = base64_decode (base64_to_int, hash_pos, hash_len, tmp_buf); @@ -99,12 +100,13 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE tmp[3] = byte_swap_32 (tmp[3]); tmp[4] = byte_swap_32 (tmp[4]); - - char ptr_plain[28]; + u8 ptr_plain[100] = { 0 }; base64_encode (int_to_base64, (const u8 *) tmp, 20, (u8 *) ptr_plain); - return snprintf (line_buf, line_size, "%s", ptr_plain); + const int out_len = snprintf (line_buf, line_size, "%s", (char *) ptr_plain); + + return out_len; } void module_init (module_ctx_t *module_ctx) diff --git a/tools/test_modules/m24800.pm b/tools/test_modules/m24800.pm index 484b1fdc8..9d63a2c9c 100644 --- a/tools/test_modules/m24800.pm +++ b/tools/test_modules/m24800.pm @@ -10,21 +10,20 @@ use warnings; use Digest::SHA1 qw (sha1); use Digest::HMAC qw (hmac hmac_hex); -use Encode qw/encode decode/; +use Encode qw (encode decode); use MIME::Base64; -sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 55], [-1, -1]] } +sub module_constraints { [[0, 256], [0, 256], [0, 27], [0, 27], [0, 27]] } sub module_generate_hash { my $word = shift; - my $unicode_word; - $unicode_word = encode("UTF-16LE", $word); - + my $unicode_word = encode ("UTF-16LE", $word); + my $digest = hmac ($unicode_word, $unicode_word, \&sha1, 64); - - my $hash = sprintf ("%s", encode_base64($digest)); + + my $hash = sprintf ("%s", encode_base64 ($digest, "")); return $hash; }