From 9bd77536c24c9ecf6b25c1dee115fe73f762e5fc Mon Sep 17 00:00:00 2001 From: philsmd Date: Sun, 26 Jul 2020 18:00:09 +0200 Subject: [PATCH] improved speed of -m 21200 by using pre-computed SHA1 hash --- OpenCL/m21200_a0-optimized.cl | 741 ++++++++------------------------- OpenCL/m21200_a0-pure.cl | 283 ++++++------- OpenCL/m21200_a1-optimized.cl | 741 ++++++++------------------------- OpenCL/m21200_a1-pure.cl | 291 ++++++------- OpenCL/m21200_a3-optimized.cl | 751 ++++++++-------------------------- OpenCL/m21200_a3-pure.cl | 343 +++++++--------- docs/changes.txt | 1 + src/modules/module_21200.c | 32 ++ tools/test_modules/m21200.pm | 2 +- 9 files changed, 924 insertions(+), 2261 deletions(-) diff --git a/OpenCL/m21200_a0-optimized.cl b/OpenCL/m21200_a0-optimized.cl index fe2cb311b..baad1072c 100644 --- a/OpenCL/m21200_a0-optimized.cl +++ b/OpenCL/m21200_a0-optimized.cl @@ -14,7 +14,6 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -82,154 +81,25 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * loop @@ -250,27 +120,27 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) * md5 */ - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = out_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = out_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -356,84 +226,31 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40); - - w0_t |= w0[0]; - w1_t |= w0[1]; - w2_t |= w0[2]; - w3_t |= w0[3]; - w4_t |= w1[0]; - w5_t |= w1[1]; - w6_t |= w1[2]; - w7_t |= w1[3]; - w8_t |= w2[0]; - w9_t |= w2[1]; - wa_t |= w2[2]; - wb_t |= w2[3]; - wc_t |= w3[0]; - wd_t |= w3[1]; - we_t |= w3[2]; - wf_t |= w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -522,30 +339,6 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) digest[2] += c; digest[3] += d; - w0[0] = _w0[0]; - w0[1] = _w0[1]; - w0[2] = _w0[2]; - w0[3] = _w0[3]; - w1[0] = _w1[0]; - w1[1] = _w1[1]; - w1[2] = _w1[2]; - w1[3] = _w1[3]; - w2[0] = _w2[0]; - w2[1] = _w2[1]; - w2[2] = _w2[2]; - w2[3] = _w2[3]; - w3[0] = _w3[0]; - w3[1] = _w3[1]; - w3[2] = _w3[2]; - w3[3] = _w3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (w0, w1, w2, w3, 8); - - w3[2] = 72 * 8; - w3[3] = 0; - // md5 final transform a = digest[0]; @@ -553,22 +346,24 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) c = digest[2]; d = digest[3]; - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = w3[2]; - wf_t = w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -708,154 +503,25 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * digest @@ -888,27 +554,27 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) * md5 */ - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = out_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = out_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -994,84 +660,31 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40); - - w0_t |= w0[0]; - w1_t |= w0[1]; - w2_t |= w0[2]; - w3_t |= w0[3]; - w4_t |= w1[0]; - w5_t |= w1[1]; - w6_t |= w1[2]; - w7_t |= w1[3]; - w8_t |= w2[0]; - w9_t |= w2[1]; - wa_t |= w2[2]; - wb_t |= w2[3]; - wc_t |= w3[0]; - wd_t |= w3[1]; - we_t |= w3[2]; - wf_t |= w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -1160,30 +773,6 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) digest[2] += c; digest[3] += d; - w0[0] = _w0[0]; - w0[1] = _w0[1]; - w0[2] = _w0[2]; - w0[3] = _w0[3]; - w1[0] = _w1[0]; - w1[1] = _w1[1]; - w1[2] = _w1[2]; - w1[3] = _w1[3]; - w2[0] = _w2[0]; - w2[1] = _w2[1]; - w2[2] = _w2[2]; - w2[3] = _w2[3]; - w3[0] = _w3[0]; - w3[1] = _w3[1]; - w3[2] = _w3[2]; - w3[3] = _w3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (w0, w1, w2, w3, 8); - - w3[2] = 72 * 8; - w3[3] = 0; - // md5 final transform a = digest[0]; @@ -1191,22 +780,24 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) c = digest[2]; d = digest[3]; - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = w3[2]; - wf_t = w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); diff --git a/OpenCL/m21200_a0-pure.cl b/OpenCL/m21200_a0-pure.cl index c06536b58..e0a801fc8 100644 --- a/OpenCL/m21200_a0-pure.cl +++ b/OpenCL/m21200_a0-pure.cl @@ -14,7 +14,6 @@ #include "inc_rp.cl" #include "inc_scalar.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -64,38 +63,42 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); + /** + * salt + */ - const u32 a0 = ctx0.h[0]; - const u32 b0 = ctx0.h[1]; - const u32 c0 = ctx0.h[2]; - const u32 d0 = ctx0.h[3]; - const u32 e0 = ctx0.h[4]; + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); /** * loop */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { pw_t tmp = PASTE_PW; @@ -110,61 +113,34 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ()) md5_final (&ctx1); - const u32 a1 = hc_swap32 (ctx1.h[0]); - const u32 b1 = hc_swap32 (ctx1.h[1]); - const u32 c1 = hc_swap32 (ctx1.h[2]); - const u32 d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_t ctx; - - md5_init (&ctx); - - w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - md5_update_64 (&ctx, w0, w1, w2, w3, 40); - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + const u32 a = hc_swap32 (ctx1.h[0]); + const u32 b = hc_swap32 (ctx1.h[1]); + const u32 c = hc_swap32 (ctx1.h[2]); + const u32 d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; w2[0] = 0; w2[1] = 0; @@ -175,6 +151,8 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ()) w3[2] = 0; w3[3] = 0; + md5_ctx_t ctx = ctx0; + md5_update_64 (&ctx, w0, w1, w2, w3, 32); md5_final (&ctx); @@ -235,38 +213,42 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); + /** + * salt + */ - const u32 a0 = ctx0.h[0]; - const u32 b0 = ctx0.h[1]; - const u32 c0 = ctx0.h[2]; - const u32 d0 = ctx0.h[3]; - const u32 e0 = ctx0.h[4]; + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); /** * loop */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { pw_t tmp = PASTE_PW; @@ -281,61 +263,34 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ()) md5_final (&ctx1); - const u32 a1 = hc_swap32 (ctx1.h[0]); - const u32 b1 = hc_swap32 (ctx1.h[1]); - const u32 c1 = hc_swap32 (ctx1.h[2]); - const u32 d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_t ctx; - - md5_init (&ctx); - - w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - md5_update_64 (&ctx, w0, w1, w2, w3, 40); - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + const u32 a = hc_swap32 (ctx1.h[0]); + const u32 b = hc_swap32 (ctx1.h[1]); + const u32 c = hc_swap32 (ctx1.h[2]); + const u32 d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; w2[0] = 0; w2[1] = 0; @@ -346,6 +301,8 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ()) w3[2] = 0; w3[3] = 0; + md5_ctx_t ctx = ctx0; + md5_update_64 (&ctx, w0, w1, w2, w3, 32); md5_final (&ctx); diff --git a/OpenCL/m21200_a1-optimized.cl b/OpenCL/m21200_a1-optimized.cl index 4f835c979..18ab817e9 100644 --- a/OpenCL/m21200_a1-optimized.cl +++ b/OpenCL/m21200_a1-optimized.cl @@ -12,7 +12,6 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -80,154 +79,25 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * loop @@ -306,27 +176,27 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) * md5 */ - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = pw_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = pw_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -412,84 +282,31 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40); - - w0_t |= w0[0]; - w1_t |= w0[1]; - w2_t |= w0[2]; - w3_t |= w0[3]; - w4_t |= w1[0]; - w5_t |= w1[1]; - w6_t |= w1[2]; - w7_t |= w1[3]; - w8_t |= w2[0]; - w9_t |= w2[1]; - wa_t |= w2[2]; - wb_t |= w2[3]; - wc_t |= w3[0]; - wd_t |= w3[1]; - we_t |= w3[2]; - wf_t |= w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -578,30 +395,6 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) digest[2] += c; digest[3] += d; - w0[0] = _w0[0]; - w0[1] = _w0[1]; - w0[2] = _w0[2]; - w0[3] = _w0[3]; - w1[0] = _w1[0]; - w1[1] = _w1[1]; - w1[2] = _w1[2]; - w1[3] = _w1[3]; - w2[0] = _w2[0]; - w2[1] = _w2[1]; - w2[2] = _w2[2]; - w2[3] = _w2[3]; - w3[0] = _w3[0]; - w3[1] = _w3[1]; - w3[2] = _w3[2]; - w3[3] = _w3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (w0, w1, w2, w3, 8); - - w3[2] = 72 * 8; - w3[3] = 0; - // md5 final transform a = digest[0]; @@ -609,22 +402,24 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) c = digest[2]; d = digest[3]; - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = w3[2]; - wf_t = w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -764,154 +559,25 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * digest @@ -1002,27 +668,27 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) * md5 */ - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = pw_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = pw_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -1108,84 +774,31 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40); - - w0_t |= w0[0]; - w1_t |= w0[1]; - w2_t |= w0[2]; - w3_t |= w0[3]; - w4_t |= w1[0]; - w5_t |= w1[1]; - w6_t |= w1[2]; - w7_t |= w1[3]; - w8_t |= w2[0]; - w9_t |= w2[1]; - wa_t |= w2[2]; - wb_t |= w2[3]; - wc_t |= w3[0]; - wd_t |= w3[1]; - we_t |= w3[2]; - wf_t |= w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -1274,30 +887,6 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) digest[2] += c; digest[3] += d; - w0[0] = _w0[0]; - w0[1] = _w0[1]; - w0[2] = _w0[2]; - w0[3] = _w0[3]; - w1[0] = _w1[0]; - w1[1] = _w1[1]; - w1[2] = _w1[2]; - w1[3] = _w1[3]; - w2[0] = _w2[0]; - w2[1] = _w2[1]; - w2[2] = _w2[2]; - w2[3] = _w2[3]; - w3[0] = _w3[0]; - w3[1] = _w3[1]; - w3[2] = _w3[2]; - w3[3] = _w3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (w0, w1, w2, w3, 8); - - w3[2] = 72 * 8; - w3[3] = 0; - // md5 final transform a = digest[0]; @@ -1305,22 +894,24 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) c = digest[2]; d = digest[3]; - w0_t = w0[0]; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = w3[2]; - wf_t = w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); diff --git a/OpenCL/m21200_a1-pure.cl b/OpenCL/m21200_a1-pure.cl index 4442de477..2e5dd6315 100644 --- a/OpenCL/m21200_a1-pure.cl +++ b/OpenCL/m21200_a1-pure.cl @@ -12,7 +12,6 @@ #include "inc_common.cl" #include "inc_scalar.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -60,29 +59,6 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); - - const u32 a0 = ctx0.h[0]; - const u32 b0 = ctx0.h[1]; - const u32 c0 = ctx0.h[2]; - const u32 d0 = ctx0.h[3]; - const u32 e0 = ctx0.h[4]; - md5_ctx_t ctx11; md5_init (&ctx11); @@ -90,13 +66,40 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ()) md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len); /** - * loop + * salt */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); + + /** + * loop + */ for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { @@ -106,61 +109,34 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ()) md5_final (&ctx1); - const u32 a1 = hc_swap32 (ctx1.h[0]); - const u32 b1 = hc_swap32 (ctx1.h[1]); - const u32 c1 = hc_swap32 (ctx1.h[2]); - const u32 d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_t ctx; - - md5_init (&ctx); - - w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - md5_update_64 (&ctx, w0, w1, w2, w3, 40); - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + const u32 a = hc_swap32 (ctx1.h[0]); + const u32 b = hc_swap32 (ctx1.h[1]); + const u32 c = hc_swap32 (ctx1.h[2]); + const u32 d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; w2[0] = 0; w2[1] = 0; @@ -171,6 +147,8 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ()) w3[2] = 0; w3[3] = 0; + md5_ctx_t ctx = ctx0; + md5_update_64 (&ctx, w0, w1, w2, w3, 32); md5_final (&ctx); @@ -229,29 +207,6 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); - - const u32 a0 = ctx0.h[0]; - const u32 b0 = ctx0.h[1]; - const u32 c0 = ctx0.h[2]; - const u32 d0 = ctx0.h[3]; - const u32 e0 = ctx0.h[4]; - md5_ctx_t ctx11; md5_init (&ctx11); @@ -259,13 +214,40 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len); /** - * loop + * salt */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); + + /** + * loop + */ for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) { @@ -275,61 +257,34 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) md5_final (&ctx1); - const u32 a1 = hc_swap32 (ctx1.h[0]); - const u32 b1 = hc_swap32 (ctx1.h[1]); - const u32 c1 = hc_swap32 (ctx1.h[2]); - const u32 d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_t ctx; - - md5_init (&ctx); - - w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - md5_update_64 (&ctx, w0, w1, w2, w3, 40); - - w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + const u32 a = hc_swap32 (ctx1.h[0]); + const u32 b = hc_swap32 (ctx1.h[1]); + const u32 c = hc_swap32 (ctx1.h[2]); + const u32 d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; w2[0] = 0; w2[1] = 0; @@ -340,6 +295,8 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) w3[2] = 0; w3[3] = 0; + md5_ctx_t ctx = ctx0; + md5_update_64 (&ctx, w0, w1, w2, w3, 32); md5_final (&ctx); diff --git a/OpenCL/m21200_a3-optimized.cl b/OpenCL/m21200_a3-optimized.cl index dc5960d4f..8204d1d42 100644 --- a/OpenCL/m21200_a3-optimized.cl +++ b/OpenCL/m21200_a3-optimized.cl @@ -12,7 +12,6 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -42,154 +41,25 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * loop @@ -207,27 +77,27 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * md5 */ - w0_t = w0lr; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = pw_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = pw_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -313,89 +183,31 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - _w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - _w2[0] = 0; - _w2[1] = 0; - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - u32x _c0[4] = { 0 }; - u32x _c1[4] = { 0 }; - u32x _c2[4] = { 0 }; - u32x _c3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (_w0, _w1, _w2, _w3, _c0, _c1, _c2, _c3, 40); - - w0_t |= _w0[0]; - w1_t |= _w0[1]; - w2_t |= _w0[2]; - w3_t |= _w0[3]; - w4_t |= _w1[0]; - w5_t |= _w1[1]; - w6_t |= _w1[2]; - w7_t |= _w1[3]; - w8_t |= _w2[0]; - w9_t |= _w2[1]; - wa_t |= _w2[2]; - wb_t |= _w2[3]; - wc_t |= _w3[0]; - wd_t |= _w3[1]; - we_t |= _w3[2]; - wf_t |= _w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -484,30 +296,6 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER digest[2] += c; digest[3] += d; - _w0[0] = _c0[0]; - _w0[1] = _c0[1]; - _w0[2] = _c0[2]; - _w0[3] = _c0[3]; - _w1[0] = _c1[0]; - _w1[1] = _c1[1]; - _w1[2] = _c1[2]; - _w1[3] = _c1[3]; - _w2[0] = _c2[0]; - _w2[1] = _c2[1]; - _w2[2] = _c2[2]; - _w2[3] = _c2[3]; - _w3[0] = _c3[0]; - _w3[1] = _c3[1]; - _w3[2] = _c3[2]; - _w3[3] = _c3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (_w0, _w1, _w2, _w3, 8); - - _w3[2] = 72 * 8; - _w3[3] = 0; - // md5 final transform a = digest[0]; @@ -515,22 +303,24 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER c = digest[2]; d = digest[3]; - w0_t = _w0[0]; - w1_t = _w0[1]; - w2_t = _w0[2]; - w3_t = _w0[3]; - w4_t = _w1[0]; - w5_t = _w1[1]; - w6_t = _w1[2]; - w7_t = _w1[3]; - w8_t = _w2[0]; - w9_t = _w2[1]; - wa_t = _w2[2]; - wb_t = _w2[3]; - wc_t = _w3[0]; - wd_t = _w3[1]; - we_t = _w3[2]; - wf_t = _w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -624,154 +414,25 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[4]; u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len); - - /** - * sha1(salt) - */ - - u32x w0_t = hc_swap32 (salt_buf0[0]); - u32x w1_t = hc_swap32 (salt_buf0[1]); - u32x w2_t = hc_swap32 (salt_buf0[2]); - u32x w3_t = hc_swap32 (salt_buf0[3]); - u32x w4_t = hc_swap32 (salt_buf1[0]); - u32x w5_t = 0; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - const u32x a0 = a; - const u32x b0 = b; - const u32x c0 = c; - const u32x d0 = d; - const u32x e0 = e; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; /** * digest @@ -801,27 +462,27 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * md5 */ - w0_t = w0lr; - w1_t = w0[1]; - w2_t = w0[2]; - w3_t = w0[3]; - w4_t = w1[0]; - w5_t = w1[1]; - w6_t = w1[2]; - w7_t = w1[3]; - w8_t = w2[0]; - w9_t = w2[1]; - wa_t = w2[2]; - wb_t = w2[3]; - wc_t = w3[0]; - wd_t = w3[1]; - we_t = pw_len * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = pw_len * 8; + u32x wf_t = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -907,89 +568,31 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * md5 */ - w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - - // ctx len 40, pos 40 - - u32x _w0[4] = { 0 }; - u32x _w1[4] = { 0 }; - u32x _w2[4] = { 0 }; - u32x _w3[4] = { 0 }; - - _w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - _w2[0] = 0; - _w2[1] = 0; - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - u32x _c0[4] = { 0 }; - u32x _c1[4] = { 0 }; - u32x _c2[4] = { 0 }; - u32x _c3[4] = { 0 }; - - switch_buffer_by_offset_carry_le (_w0, _w1, _w2, _w3, _c0, _c1, _c2, _c3, 40); - - w0_t |= _w0[0]; - w1_t |= _w0[1]; - w2_t |= _w0[2]; - w3_t |= _w0[3]; - w4_t |= _w1[0]; - w5_t |= _w1[1]; - w6_t |= _w1[2]; - w7_t |= _w1[3]; - w8_t |= _w2[0]; - w9_t |= _w2[1]; - wa_t |= _w2[2]; - wb_t |= _w2[3]; - wc_t |= _w3[0]; - wd_t |= _w3[1]; - we_t |= _w3[2]; - wf_t |= _w3[3]; + // combine sha1 ($salt) . md5 ($pass) + + w0_t = salt_buf0[0]; + w1_t = salt_buf0[1]; + w2_t = salt_buf0[2]; + w3_t = salt_buf0[3]; + w4_t = salt_buf1[0]; + w5_t = salt_buf1[1]; + w6_t = salt_buf1[2]; + w7_t = salt_buf1[3]; + w8_t = salt_buf2[0]; + w9_t = salt_buf2[1]; + + wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; + wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; + wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; + wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; + we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; + wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; // md5 transform @@ -1078,30 +681,6 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER digest[2] += c; digest[3] += d; - _w0[0] = _c0[0]; - _w0[1] = _c0[1]; - _w0[2] = _c0[2]; - _w0[3] = _c0[3]; - _w1[0] = _c1[0]; - _w1[1] = _c1[1]; - _w1[2] = _c1[2]; - _w1[3] = _c1[3]; - _w2[0] = _c2[0]; - _w2[1] = _c2[1]; - _w2[2] = _c2[2]; - _w2[3] = _c2[3]; - _w3[0] = _c3[0]; - _w3[1] = _c3[1]; - _w3[2] = _c3[2]; - _w3[3] = _c3[3]; - - // ctx len 72, pos 8 - - append_0x80_4x4 (_w0, _w1, _w2, _w3, 8); - - _w3[2] = 72 * 8; - _w3[3] = 0; - // md5 final transform a = digest[0]; @@ -1109,22 +688,24 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER c = digest[2]; d = digest[3]; - w0_t = _w0[0]; - w1_t = _w0[1]; - w2_t = _w0[2]; - w3_t = _w0[3]; - w4_t = _w1[0]; - w5_t = _w1[1]; - w6_t = _w1[2]; - w7_t = _w1[3]; - w8_t = _w2[0]; - w9_t = _w2[1]; - wa_t = _w2[2]; - wb_t = _w2[3]; - wc_t = _w3[0]; - wd_t = _w3[1]; - we_t = _w3[2]; - wf_t = _w3[3]; + w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; + w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; + w2_t = 0x00000080; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 72 * 8; + wf_t = 0; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); diff --git a/OpenCL/m21200_a3-pure.cl b/OpenCL/m21200_a3-pure.cl index a2206dcc8..b69aa225b 100644 --- a/OpenCL/m21200_a3-pure.cl +++ b/OpenCL/m21200_a3-pure.cl @@ -12,7 +12,6 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" -#include "inc_hash_sha1.cl" #endif #if VECT_SIZE == 1 @@ -69,38 +68,42 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); + /** + * salt + */ - const u32x a0 = ctx0.h[0]; - const u32x b0 = ctx0.h[1]; - const u32x c0 = ctx0.h[2]; - const u32x d0 = ctx0.h[3]; - const u32x e0 = ctx0.h[4]; + u32x salt_buf0[4]; + u32x salt_buf1[4]; + u32x salt_buf2[4]; + u32x salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); /** * loop */ - u32x _w0[4]; - u32x _w1[4]; - u32x _w2[4]; - u32x _w3[4]; - u32x w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) @@ -113,78 +116,51 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ()) md5_ctx_vector_t ctx1; - md5_init_vector (&ctx1); - + md5_init_vector (&ctx1); md5_update_vector (&ctx1, w, pw_len); - - md5_final_vector (&ctx1); - - const u32x a1 = hc_swap32 (ctx1.h[0]); - const u32x b1 = hc_swap32 (ctx1.h[1]); - const u32x c1 = hc_swap32 (ctx1.h[2]); - const u32x d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_vector_t ctx; - - md5_init_vector (&ctx); - - _w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - _w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - _w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40); - - _w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - _w2[0] = 0; - _w2[1] = 0; - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32); + md5_final_vector (&ctx1); + + const u32x a = hc_swap32 (ctx1.h[0]); + const u32x b = hc_swap32 (ctx1.h[1]); + const u32x c = hc_swap32 (ctx1.h[2]); + const u32x d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 0; + w3_t[3] = 0; + + md5_ctx_vector_t ctx = ctx0; + + md5_update_vector_64 (&ctx, w0_t, w1_t, w2_t, w3_t, 32); md5_final_vector (&ctx); @@ -251,38 +227,42 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[64] = { 0 }; - - for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) - { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; - } - - sha1_ctx_t ctx0; - - sha1_init (&ctx0); - - sha1_update_swap (&ctx0, s, salt_len); - - sha1_final (&ctx0); + /** + * salt + */ - const u32x a0 = ctx0.h[0]; - const u32x b0 = ctx0.h[1]; - const u32x c0 = ctx0.h[2]; - const u32x d0 = ctx0.h[3]; - const u32x e0 = ctx0.h[4]; + u32x salt_buf0[4]; + u32x salt_buf1[4]; + u32x salt_buf2[4]; + u32x salt_buf3[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40); /** * loop */ - u32x _w0[4]; - u32x _w1[4]; - u32x _w2[4]; - u32x _w3[4]; - u32x w0l = w[0]; for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) @@ -295,78 +275,51 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ()) md5_ctx_vector_t ctx1; - md5_init_vector (&ctx1); - + md5_init_vector (&ctx1); md5_update_vector (&ctx1, w, pw_len); - - md5_final_vector (&ctx1); - - const u32x a1 = hc_swap32 (ctx1.h[0]); - const u32x b1 = hc_swap32 (ctx1.h[1]); - const u32x c1 = hc_swap32 (ctx1.h[2]); - const u32x d1 = hc_swap32 (ctx1.h[3]); - - md5_ctx_vector_t ctx; - - md5_init_vector (&ctx); - - _w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a0 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b0 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c0 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d0 >> 0) & 255) << 16; - _w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 16) & 255) << 16; - _w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e0 >> 0) & 255) << 16; - - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40); - - _w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 16) & 255) << 16; - _w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a1 >> 0) & 255) << 16; - _w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 16) & 255) << 16; - _w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b1 >> 0) & 255) << 16; - _w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 16) & 255) << 16; - _w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c1 >> 0) & 255) << 16; - _w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 16) & 255) << 16; - _w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d1 >> 0) & 255) << 16; - - _w2[0] = 0; - _w2[1] = 0; - _w2[2] = 0; - _w2[3] = 0; - _w3[0] = 0; - _w3[1] = 0; - _w3[2] = 0; - _w3[3] = 0; - - md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32); + md5_final_vector (&ctx1); + + const u32x a = hc_swap32 (ctx1.h[0]); + const u32x b = hc_swap32 (ctx1.h[1]); + const u32x c = hc_swap32 (ctx1.h[2]); + const u32x d = hc_swap32 (ctx1.h[3]); + + // add md5_hex ($pass) to ctx0: + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 0; + w3_t[3] = 0; + + md5_ctx_vector_t ctx = ctx0; + + md5_update_vector_64 (&ctx, w0_t, w1_t, w2_t, w3_t, 32); md5_final_vector (&ctx); diff --git a/docs/changes.txt b/docs/changes.txt index b89acb341..0383ad9dc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -24,6 +24,7 @@ - Compile macOS: Fixed makefile target 'clean' to correctly remove *.dSYM folders - Compile ZLIB: Fixed makefile include paths in case USE_SYSTEM_ZLIB is used +- Hash-Mode 21200 (md5(sha1($salt).md5($pass))): Improved speed by using pre-computed SHA1 - OpenCL Kernels: Added datatypes to literals of enum costants - OpenCL Kernels: Added pure kernels for hash-mode 600 (BLAKE2b-512) - OpenCL Runtime: Add some unstable warnings for some SHA512 based algorithms on AMD GPU on macOS diff --git a/src/modules/module_21200.c b/src/modules/module_21200.c index 00d1f58cb..14414e293 100644 --- a/src/modules/module_21200.c +++ b/src/modules/module_21200.c @@ -9,6 +9,7 @@ #include "bitops.h" #include "convert.h" #include "shared.h" +#include "emu_inc_hash_sha1.h" static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; static const u32 DGST_POS0 = 0; @@ -98,6 +99,37 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE if (parse_rc == false) return (PARSER_SALT_LENGTH); + // precompute sha1 ($salt) into salt->salt_buf_pc: + + u32 s[64]; + + for (int i = 0; i < 64; i++) + { + s[i] = byte_swap_32 (salt->salt_buf[i]); + } + + sha1_ctx_t sha1_ctx; + + sha1_init (&sha1_ctx); + sha1_update (&sha1_ctx, s, salt->salt_len); + sha1_final (&sha1_ctx); + + u32 pc[4]; + + pc[0] = byte_swap_32 (sha1_ctx.h[0]); + pc[1] = byte_swap_32 (sha1_ctx.h[1]); + pc[2] = byte_swap_32 (sha1_ctx.h[2]); + pc[3] = byte_swap_32 (sha1_ctx.h[3]); + pc[4] = byte_swap_32 (sha1_ctx.h[4]); + + u8 *salt_buf_pc = (u8 *) salt->salt_buf_pc; + + u32_to_hex (pc[0], salt_buf_pc + 0); + u32_to_hex (pc[1], salt_buf_pc + 8); + u32_to_hex (pc[2], salt_buf_pc + 16); + u32_to_hex (pc[3], salt_buf_pc + 24); + u32_to_hex (pc[4], salt_buf_pc + 32); + return (PARSER_OK); } diff --git a/tools/test_modules/m21200.pm b/tools/test_modules/m21200.pm index 3386d285d..350860bf3 100644 --- a/tools/test_modules/m21200.pm +++ b/tools/test_modules/m21200.pm @@ -11,7 +11,7 @@ use warnings; use Digest::MD5 qw (md5_hex); use Digest::SHA qw (sha1_hex); -sub module_constraints { [[0, 256], [0, 256], [0, 55], [-1, -1], [-1, -1]] } +sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 256], [-1, -1]] } sub module_generate_hash {