From de137b96ee8bfee9b4f968b8b2fe8d4e62882316 Mon Sep 17 00:00:00 2001 From: TROUNCE Date: Sun, 25 Oct 2020 22:57:44 +0000 Subject: [PATCH 1/5] Add files via upload --- OpenCL/m03500_a0-optimized.cl | 824 +++++++++++++++++++++++ OpenCL/m03500_a0-pure.cl | 307 +++++++++ OpenCL/m03500_a1-optimized.cl | 940 ++++++++++++++++++++++++++ OpenCL/m03500_a1-pure.cl | 301 +++++++++ OpenCL/m03500_a3-optimized.cl | 1191 +++++++++++++++++++++++++++++++++ OpenCL/m03500_a3-pure.cl | 327 +++++++++ 6 files changed, 3890 insertions(+) create mode 100644 OpenCL/m03500_a0-optimized.cl create mode 100644 OpenCL/m03500_a0-pure.cl create mode 100644 OpenCL/m03500_a1-optimized.cl create mode 100644 OpenCL/m03500_a1-pure.cl create mode 100644 OpenCL/m03500_a3-optimized.cl create mode 100644 OpenCL/m03500_a3-pure.cl diff --git a/OpenCL/m03500_a0-optimized.cl b/OpenCL/m03500_a0-optimized.cl new file mode 100644 index 000000000..0fdbafd51 --- /dev/null +++ b/OpenCL/m03500_a0-optimized.cl @@ -0,0 +1,824 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m03500_a0-pure.cl b/OpenCL/m03500_a0-pure.cl new file mode 100644 index 000000000..582bce969 --- /dev/null +++ b/OpenCL/m03500_a0-pure.cl @@ -0,0 +1,307 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx1; + + md5_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx1.len = 32; + + md5_final (&ctx1); + + a = ctx1.h[0]; + b = ctx1.h[1]; + c = ctx1.h[2]; + d = ctx1.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx1; + + md5_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx1.len = 32; + + md5_final (&ctx1); + + a = ctx1.h[0]; + b = ctx1.h[1]; + c = ctx1.h[2]; + d = ctx1.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m03500_a1-optimized.cl b/OpenCL/m03500_a1-optimized.cl new file mode 100644 index 000000000..93bdfa5fd --- /dev/null +++ b/OpenCL/m03500_a1-optimized.cl @@ -0,0 +1,940 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m03500_a1-pure.cl b/OpenCL/m03500_a1-pure.cl new file mode 100644 index 000000000..f73218057 --- /dev/null +++ b/OpenCL/m03500_a1-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m03500_a3-optimized.cl b/OpenCL/m03500_a3-optimized.cl new file mode 100644 index 000000000..1034731cd --- /dev/null +++ b/OpenCL/m03500_a3-optimized.cl @@ -0,0 +1,1191 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m03500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +DECLSPEC void m03500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + + } +} + +KERNEL_FQ void m03500_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m03500_a3-pure.cl b/OpenCL/m03500_a3-pure.cl new file mode 100644 index 000000000..a952f9854 --- /dev/null +++ b/OpenCL/m03500_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_vector_t ctx2; + + md5_init_vector (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update_vector (&ctx2, s, salt_len); + + md5_final_vector (&ctx2); + + const u32x r0 = ctx2.h[DGST_R0]; + const u32x r1 = ctx2.h[DGST_R1]; + const u32x r2 = ctx2.h[DGST_R2]; + const u32x r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_vector_t ctx2; + + md5_init_vector (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update_vector (&ctx2, s, salt_len); + + md5_final_vector (&ctx2); + + const u32x r0 = ctx2.h[DGST_R0]; + const u32x r1 = ctx2.h[DGST_R1]; + const u32x r2 = ctx2.h[DGST_R2]; + const u32x r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} From 8c0d467638d55fb83de05806a4b2d7432a30591d Mon Sep 17 00:00:00 2001 From: TROUNCE Date: Sun, 25 Oct 2020 22:58:18 +0000 Subject: [PATCH 2/5] Add files via upload --- src/modules/module_03500.c | 203 +++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 src/modules/module_03500.c diff --git a/src/modules/module_03500.c b/src/modules/module_03500.c new file mode 100644 index 000000000..10182be02 --- /dev/null +++ b/src/modules/module_03500.c @@ -0,0 +1,203 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 3; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 1; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED; +static const char *HASH_NAME = "md5(md5(md5($pass)))"; +static const u64 KERN_TYPE = 3500; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_INIT + | OPTI_TYPE_EARLY_SKIP; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_PT_ADD80 + | OPTS_TYPE_PT_ADDBITS14 + | OPTS_TYPE_ST_ADD80; +static const u32 SALT_TYPE = SALT_TYPE_VIRTUAL; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "9882d0778518b095917eb589f6998441"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + token_t token; + + token.token_cnt = 1; + + token.len_min[0] = 32; + token.len_max[0] = 32; + token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_pos = token.buf[0]; + + digest[0] = hex_to_u32 (hash_pos + 0); + digest[1] = hex_to_u32 (hash_pos + 8); + digest[2] = hex_to_u32 (hash_pos + 16); + digest[3] = hex_to_u32 (hash_pos + 24); + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + digest[0] -= MD5M_A; + digest[1] -= MD5M_B; + digest[2] -= MD5M_C; + digest[3] -= MD5M_D; + } + + /** + * This is a virtual salt. While the algorithm is basically not salted + * we can exploit the salt buffer to set the 0x80 and the w[14] value. + * This way we can save a special md5md5 kernel and reuse the one from vbull. + */ + + static const u8 *zero = (const u8*) ""; + + const bool parse_rc = generic_salt_decode (hashconfig, zero, 0, (u8 *) salt->salt_buf, (int *) &salt->salt_len); + + if (parse_rc == false) return (PARSER_SALT_LENGTH); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u32 tmp[4]; + + tmp[0] = digest[0]; + tmp[1] = digest[1]; + tmp[2] = digest[2]; + tmp[3] = digest[3]; + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + tmp[0] += MD5M_A; + tmp[1] += MD5M_B; + tmp[2] += MD5M_C; + tmp[3] += MD5M_D; + } + + u8 *out_buf = (u8 *) line_buf; + + int out_len = 0; + + u32_to_hex (tmp[0], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[1], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[2], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[3], out_buf + out_len); out_len += 8; + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = MODULE_DEFAULT; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} From ee569ea5409815c95acb585532370ec8e255595d Mon Sep 17 00:00:00 2001 From: TROUNCE Date: Sun, 25 Oct 2020 22:58:47 +0000 Subject: [PATCH 3/5] Add files via upload --- tools/test_modules/m03500.pm | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tools/test_modules/m03500.pm diff --git a/tools/test_modules/m03500.pm b/tools/test_modules/m03500.pm new file mode 100644 index 000000000..610a0990f --- /dev/null +++ b/tools/test_modules/m03500.pm @@ -0,0 +1,42 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::MD5 qw (md5_hex); + +sub module_constraints { [[0, 256], [-1, -1], [0, 55], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + + my $digest = md5_hex (md5_hex (md5_hex ($word))); + + my $hash = sprintf ("%s", $digest); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed); + + return ($new_hash, $word); +} + +1; From afabc60ffafdabd0def87e4ae018648cf40fffa0 Mon Sep 17 00:00:00 2001 From: TROUNCE Date: Sun, 25 Oct 2020 22:59:28 +0000 Subject: [PATCH 4/5] Update changes.txt --- docs/changes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changes.txt b/docs/changes.txt index 387941f96..9b097c1af 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -14,6 +14,7 @@ - Added hash-mode: RAR3-p (Uncompressed) - Added hash-mode: RSA/DSA/EC/OPENSSH Private Keys - Added hash-mode: sha1(sha1($pass).$salt) +- Added hash-mode: md5(md5(md5($pass))) ## ## Bugs From 7b2fe7bbfcccdfd4030efb3862f65e2b183bd060 Mon Sep 17 00:00:00 2001 From: TROUNCE Date: Sun, 25 Oct 2020 23:00:03 +0000 Subject: [PATCH 5/5] Update readme.txt --- docs/readme.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/readme.txt b/docs/readme.txt index 21fec6f19..a73442ab0 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -79,6 +79,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - md5($salt.sha1($salt.$pass)) - md5($salt.utf16le($pass)) - md5(md5($pass)) +- md5(md5(md5($pass))) - md5(md5($pass).md5($salt)) - md5(sha1($pass)) - md5(sha1($pass).md5($pass).sha1($pass))