improved speed of -m 21200 by using pre-computed SHA1 hash

pull/2501/head
philsmd 4 years ago
parent 5f7b70bc42
commit 9bd77536c2
No known key found for this signature in database
GPG Key ID: 4F25D016D9D6A8AF

@ -14,7 +14,6 @@
#include "inc_rp_optimized.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -82,154 +81,25 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ())
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* loop
@ -250,27 +120,27 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ())
* md5
*/
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = out_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = out_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -356,84 +226,31 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ())
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40);
w0_t |= w0[0];
w1_t |= w0[1];
w2_t |= w0[2];
w3_t |= w0[3];
w4_t |= w1[0];
w5_t |= w1[1];
w6_t |= w1[2];
w7_t |= w1[3];
w8_t |= w2[0];
w9_t |= w2[1];
wa_t |= w2[2];
wb_t |= w2[3];
wc_t |= w3[0];
wd_t |= w3[1];
we_t |= w3[2];
wf_t |= w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -522,30 +339,6 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ())
digest[2] += c;
digest[3] += d;
w0[0] = _w0[0];
w0[1] = _w0[1];
w0[2] = _w0[2];
w0[3] = _w0[3];
w1[0] = _w1[0];
w1[1] = _w1[1];
w1[2] = _w1[2];
w1[3] = _w1[3];
w2[0] = _w2[0];
w2[1] = _w2[1];
w2[2] = _w2[2];
w2[3] = _w2[3];
w3[0] = _w3[0];
w3[1] = _w3[1];
w3[2] = _w3[2];
w3[3] = _w3[3];
// ctx len 72, pos 8
append_0x80_4x4 (w0, w1, w2, w3, 8);
w3[2] = 72 * 8;
w3[3] = 0;
// md5 final transform
a = digest[0];
@ -553,22 +346,24 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ())
c = digest[2];
d = digest[3];
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = w3[2];
wf_t = w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -708,154 +503,25 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ())
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* digest
@ -888,27 +554,27 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ())
* md5
*/
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = out_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = out_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -994,84 +660,31 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ())
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40);
w0_t |= w0[0];
w1_t |= w0[1];
w2_t |= w0[2];
w3_t |= w0[3];
w4_t |= w1[0];
w5_t |= w1[1];
w6_t |= w1[2];
w7_t |= w1[3];
w8_t |= w2[0];
w9_t |= w2[1];
wa_t |= w2[2];
wb_t |= w2[3];
wc_t |= w3[0];
wd_t |= w3[1];
we_t |= w3[2];
wf_t |= w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -1160,30 +773,6 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ())
digest[2] += c;
digest[3] += d;
w0[0] = _w0[0];
w0[1] = _w0[1];
w0[2] = _w0[2];
w0[3] = _w0[3];
w1[0] = _w1[0];
w1[1] = _w1[1];
w1[2] = _w1[2];
w1[3] = _w1[3];
w2[0] = _w2[0];
w2[1] = _w2[1];
w2[2] = _w2[2];
w2[3] = _w2[3];
w3[0] = _w3[0];
w3[1] = _w3[1];
w3[2] = _w3[2];
w3[3] = _w3[3];
// ctx len 72, pos 8
append_0x80_4x4 (w0, w1, w2, w3, 8);
w3[2] = 72 * 8;
w3[3] = 0;
// md5 final transform
a = digest[0];
@ -1191,22 +780,24 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ())
c = digest[2];
d = digest[3];
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = w3[2];
wf_t = w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);

@ -14,7 +14,6 @@
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -64,38 +63,42 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
/**
* salt
*/
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
@ -110,61 +113,34 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
const u32 a = hc_swap32 (ctx1.h[0]);
const u32 b = hc_swap32 (ctx1.h[1]);
const u32 c = hc_swap32 (ctx1.h[2]);
const u32 d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
@ -175,6 +151,8 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())
w3[2] = 0;
w3[3] = 0;
md5_ctx_t ctx = ctx0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
@ -235,38 +213,42 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
/**
* salt
*/
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
pw_t tmp = PASTE_PW;
@ -281,61 +263,34 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
const u32 a = hc_swap32 (ctx1.h[0]);
const u32 b = hc_swap32 (ctx1.h[1]);
const u32 c = hc_swap32 (ctx1.h[2]);
const u32 d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
@ -346,6 +301,8 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())
w3[2] = 0;
w3[3] = 0;
md5_ctx_t ctx = ctx0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);

@ -12,7 +12,6 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -80,154 +79,25 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ())
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* loop
@ -306,27 +176,27 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ())
* md5
*/
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = pw_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -412,84 +282,31 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ())
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40);
w0_t |= w0[0];
w1_t |= w0[1];
w2_t |= w0[2];
w3_t |= w0[3];
w4_t |= w1[0];
w5_t |= w1[1];
w6_t |= w1[2];
w7_t |= w1[3];
w8_t |= w2[0];
w9_t |= w2[1];
wa_t |= w2[2];
wb_t |= w2[3];
wc_t |= w3[0];
wd_t |= w3[1];
we_t |= w3[2];
wf_t |= w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -578,30 +395,6 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ())
digest[2] += c;
digest[3] += d;
w0[0] = _w0[0];
w0[1] = _w0[1];
w0[2] = _w0[2];
w0[3] = _w0[3];
w1[0] = _w1[0];
w1[1] = _w1[1];
w1[2] = _w1[2];
w1[3] = _w1[3];
w2[0] = _w2[0];
w2[1] = _w2[1];
w2[2] = _w2[2];
w2[3] = _w2[3];
w3[0] = _w3[0];
w3[1] = _w3[1];
w3[2] = _w3[2];
w3[3] = _w3[3];
// ctx len 72, pos 8
append_0x80_4x4 (w0, w1, w2, w3, 8);
w3[2] = 72 * 8;
w3[3] = 0;
// md5 final transform
a = digest[0];
@ -609,22 +402,24 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ())
c = digest[2];
d = digest[3];
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = w3[2];
wf_t = w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -764,154 +559,25 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ())
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* digest
@ -1002,27 +668,27 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ())
* md5
*/
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = pw_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -1108,84 +774,31 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ())
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
switch_buffer_by_offset_carry_le (w0, w1, w2, w3, _w0, _w1, _w2, _w3, 40);
w0_t |= w0[0];
w1_t |= w0[1];
w2_t |= w0[2];
w3_t |= w0[3];
w4_t |= w1[0];
w5_t |= w1[1];
w6_t |= w1[2];
w7_t |= w1[3];
w8_t |= w2[0];
w9_t |= w2[1];
wa_t |= w2[2];
wb_t |= w2[3];
wc_t |= w3[0];
wd_t |= w3[1];
we_t |= w3[2];
wf_t |= w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -1274,30 +887,6 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ())
digest[2] += c;
digest[3] += d;
w0[0] = _w0[0];
w0[1] = _w0[1];
w0[2] = _w0[2];
w0[3] = _w0[3];
w1[0] = _w1[0];
w1[1] = _w1[1];
w1[2] = _w1[2];
w1[3] = _w1[3];
w2[0] = _w2[0];
w2[1] = _w2[1];
w2[2] = _w2[2];
w2[3] = _w2[3];
w3[0] = _w3[0];
w3[1] = _w3[1];
w3[2] = _w3[2];
w3[3] = _w3[3];
// ctx len 72, pos 8
append_0x80_4x4 (w0, w1, w2, w3, 8);
w3[2] = 72 * 8;
w3[3] = 0;
// md5 final transform
a = digest[0];
@ -1305,22 +894,24 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ())
c = digest[2];
d = digest[3];
w0_t = w0[0];
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = w3[2];
wf_t = w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);

@ -12,7 +12,6 @@
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -60,29 +59,6 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx11;
md5_init (&ctx11);
@ -90,13 +66,40 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
* salt
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
@ -106,61 +109,34 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
const u32 a = hc_swap32 (ctx1.h[0]);
const u32 b = hc_swap32 (ctx1.h[1]);
const u32 c = hc_swap32 (ctx1.h[2]);
const u32 d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
@ -171,6 +147,8 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
w3[2] = 0;
w3[3] = 0;
md5_ctx_t ctx = ctx0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);
@ -229,29 +207,6 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
* base
*/
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
const u32 a0 = ctx0.h[0];
const u32 b0 = ctx0.h[1];
const u32 c0 = ctx0.h[2];
const u32 d0 = ctx0.h[3];
const u32 e0 = ctx0.h[4];
md5_ctx_t ctx11;
md5_init (&ctx11);
@ -259,13 +214,40 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
md5_update_global (&ctx11, pws[gid].i, pws[gid].pw_len);
/**
* loop
* salt
*/
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 salt_buf0[4];
u32 salt_buf1[4];
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
@ -275,61 +257,34 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
md5_final (&ctx1);
const u32 a1 = hc_swap32 (ctx1.h[0]);
const u32 b1 = hc_swap32 (ctx1.h[1]);
const u32 c1 = hc_swap32 (ctx1.h[2]);
const u32 d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_t ctx;
md5_init (&ctx);
w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 40);
w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
const u32 a = hc_swap32 (ctx1.h[0]);
const u32 b = hc_swap32 (ctx1.h[1]);
const u32 c = hc_swap32 (ctx1.h[2]);
const u32 d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
@ -340,6 +295,8 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
w3[2] = 0;
w3[3] = 0;
md5_ctx_t ctx = ctx0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_final (&ctx);

@ -12,7 +12,6 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -42,154 +41,25 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* loop
@ -207,27 +77,27 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
* md5
*/
w0_t = w0lr;
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = pw_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0lr;
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -313,89 +183,31 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
u32x _c0[4] = { 0 };
u32x _c1[4] = { 0 };
u32x _c2[4] = { 0 };
u32x _c3[4] = { 0 };
switch_buffer_by_offset_carry_le (_w0, _w1, _w2, _w3, _c0, _c1, _c2, _c3, 40);
w0_t |= _w0[0];
w1_t |= _w0[1];
w2_t |= _w0[2];
w3_t |= _w0[3];
w4_t |= _w1[0];
w5_t |= _w1[1];
w6_t |= _w1[2];
w7_t |= _w1[3];
w8_t |= _w2[0];
w9_t |= _w2[1];
wa_t |= _w2[2];
wb_t |= _w2[3];
wc_t |= _w3[0];
wd_t |= _w3[1];
we_t |= _w3[2];
wf_t |= _w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -484,30 +296,6 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
digest[2] += c;
digest[3] += d;
_w0[0] = _c0[0];
_w0[1] = _c0[1];
_w0[2] = _c0[2];
_w0[3] = _c0[3];
_w1[0] = _c1[0];
_w1[1] = _c1[1];
_w1[2] = _c1[2];
_w1[3] = _c1[3];
_w2[0] = _c2[0];
_w2[1] = _c2[1];
_w2[2] = _c2[2];
_w2[3] = _c2[3];
_w3[0] = _c3[0];
_w3[1] = _c3[1];
_w3[2] = _c3[2];
_w3[3] = _c3[3];
// ctx len 72, pos 8
append_0x80_4x4 (_w0, _w1, _w2, _w3, 8);
_w3[2] = 72 * 8;
_w3[3] = 0;
// md5 final transform
a = digest[0];
@ -515,22 +303,24 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
c = digest[2];
d = digest[3];
w0_t = _w0[0];
w1_t = _w0[1];
w2_t = _w0[2];
w3_t = _w0[3];
w4_t = _w1[0];
w5_t = _w1[1];
w6_t = _w1[2];
w7_t = _w1[3];
w8_t = _w2[0];
w9_t = _w2[1];
wa_t = _w2[2];
wb_t = _w2[3];
wc_t = _w3[0];
wd_t = _w3[1];
we_t = _w3[2];
wf_t = _w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -624,154 +414,25 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
u32 salt_buf0[4];
u32 salt_buf1[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
salt_buf1[1] = 0;
salt_buf1[2] = 0;
salt_buf1[3] = 0;
const u32 salt_len = salt_bufs[salt_pos].salt_len;
append_0x80_2x4_S (salt_buf0, salt_buf1, salt_len);
/**
* sha1(salt)
*/
u32x w0_t = hc_swap32 (salt_buf0[0]);
u32x w1_t = hc_swap32 (salt_buf0[1]);
u32x w2_t = hc_swap32 (salt_buf0[2]);
u32x w3_t = hc_swap32 (salt_buf0[3]);
u32x w4_t = hc_swap32 (salt_buf1[0]);
u32x w5_t = 0;
u32x w6_t = 0;
u32x w7_t = 0;
u32x w8_t = 0;
u32x w9_t = 0;
u32x wa_t = 0;
u32x wb_t = 0;
u32x wc_t = 0;
u32x wd_t = 0;
u32x we_t = 0;
u32x wf_t = salt_len * 8;
u32x a = SHA1M_A;
u32x b = SHA1M_B;
u32x c = SHA1M_C;
u32x d = SHA1M_D;
u32x e = SHA1M_E;
#undef K
#define K SHA1C00
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t);
SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t);
SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t);
SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t);
SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t);
SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t);
#undef K
#define K SHA1C01
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t);
#undef K
#define K SHA1C02
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t);
#undef K
#define K SHA1C03
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t);
w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t);
w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t);
w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t);
w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t);
w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t);
w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t);
w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t);
w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t);
w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t);
w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t);
wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
a += SHA1M_A;
b += SHA1M_B;
c += SHA1M_C;
d += SHA1M_D;
e += SHA1M_E;
const u32x a0 = a;
const u32x b0 = b;
const u32x c0 = c;
const u32x d0 = d;
const u32x e0 = e;
u32 salt_buf2[4];
u32 salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
/**
* digest
@ -801,27 +462,27 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
* md5
*/
w0_t = w0lr;
w1_t = w0[1];
w2_t = w0[2];
w3_t = w0[3];
w4_t = w1[0];
w5_t = w1[1];
w6_t = w1[2];
w7_t = w1[3];
w8_t = w2[0];
w9_t = w2[1];
wa_t = w2[2];
wb_t = w2[3];
wc_t = w3[0];
wd_t = w3[1];
we_t = pw_len * 8;
wf_t = 0;
a = MD5M_A;
b = MD5M_B;
c = MD5M_C;
d = MD5M_D;
u32x w0_t = w0lr;
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = pw_len * 8;
u32x wf_t = 0;
u32x a = MD5M_A;
u32x b = MD5M_B;
u32x c = MD5M_C;
u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
@ -907,89 +568,31 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
* md5
*/
w0_t = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
w2_t = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
w3_t = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
w4_t = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
w5_t = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
w6_t = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
w7_t = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
w8_t = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
w9_t = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 0;
wf_t = 0;
// ctx len 40, pos 40
u32x _w0[4] = { 0 };
u32x _w1[4] = { 0 };
u32x _w2[4] = { 0 };
u32x _w3[4] = { 0 };
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
u32x _c0[4] = { 0 };
u32x _c1[4] = { 0 };
u32x _c2[4] = { 0 };
u32x _c3[4] = { 0 };
switch_buffer_by_offset_carry_le (_w0, _w1, _w2, _w3, _c0, _c1, _c2, _c3, 40);
w0_t |= _w0[0];
w1_t |= _w0[1];
w2_t |= _w0[2];
w3_t |= _w0[3];
w4_t |= _w1[0];
w5_t |= _w1[1];
w6_t |= _w1[2];
w7_t |= _w1[3];
w8_t |= _w2[0];
w9_t |= _w2[1];
wa_t |= _w2[2];
wb_t |= _w2[3];
wc_t |= _w3[0];
wd_t |= _w3[1];
we_t |= _w3[2];
wf_t |= _w3[3];
// combine sha1 ($salt) . md5 ($pass)
w0_t = salt_buf0[0];
w1_t = salt_buf0[1];
w2_t = salt_buf0[2];
w3_t = salt_buf0[3];
w4_t = salt_buf1[0];
w5_t = salt_buf1[1];
w6_t = salt_buf1[2];
w7_t = salt_buf1[3];
w8_t = salt_buf2[0];
w9_t = salt_buf2[1];
wa_t = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
wb_t = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
wc_t = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
wd_t = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
we_t = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
wf_t = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
// md5 transform
@ -1078,30 +681,6 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
digest[2] += c;
digest[3] += d;
_w0[0] = _c0[0];
_w0[1] = _c0[1];
_w0[2] = _c0[2];
_w0[3] = _c0[3];
_w1[0] = _c1[0];
_w1[1] = _c1[1];
_w1[2] = _c1[2];
_w1[3] = _c1[3];
_w2[0] = _c2[0];
_w2[1] = _c2[1];
_w2[2] = _c2[2];
_w2[3] = _c2[3];
_w3[0] = _c3[0];
_w3[1] = _c3[1];
_w3[2] = _c3[2];
_w3[3] = _c3[3];
// ctx len 72, pos 8
append_0x80_4x4 (_w0, _w1, _w2, _w3, 8);
_w3[2] = 72 * 8;
_w3[3] = 0;
// md5 final transform
a = digest[0];
@ -1109,22 +688,24 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER
c = digest[2];
d = digest[3];
w0_t = _w0[0];
w1_t = _w0[1];
w2_t = _w0[2];
w3_t = _w0[3];
w4_t = _w1[0];
w5_t = _w1[1];
w6_t = _w1[2];
w7_t = _w1[3];
w8_t = _w2[0];
w9_t = _w2[1];
wa_t = _w2[2];
wb_t = _w2[3];
wc_t = _w3[0];
wd_t = _w3[1];
we_t = _w3[2];
wf_t = _w3[3];
w0_t = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
w1_t = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
w2_t = 0x00000080;
w3_t = 0;
w4_t = 0;
w5_t = 0;
w6_t = 0;
w7_t = 0;
w8_t = 0;
w9_t = 0;
wa_t = 0;
wb_t = 0;
wc_t = 0;
wd_t = 0;
we_t = 72 * 8;
wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);

@ -12,7 +12,6 @@
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#if VECT_SIZE == 1
@ -69,38 +68,42 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ())
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
/**
* salt
*/
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
u32x salt_buf0[4];
u32x salt_buf1[4];
u32x salt_buf2[4];
u32x salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
@ -113,78 +116,51 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ())
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
md5_final_vector (&ctx1);
const u32x a = hc_swap32 (ctx1.h[0]);
const u32x b = hc_swap32 (ctx1.h[1]);
const u32x c = hc_swap32 (ctx1.h[2]);
const u32x d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0_t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0_t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0_t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1_t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1_t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1_t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1_t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
md5_ctx_vector_t ctx = ctx0;
md5_update_vector_64 (&ctx, w0_t, w1_t, w2_t, w3_t, 32);
md5_final_vector (&ctx);
@ -251,38 +227,42 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ())
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
}
sha1_ctx_t ctx0;
sha1_init (&ctx0);
sha1_update_swap (&ctx0, s, salt_len);
sha1_final (&ctx0);
/**
* salt
*/
const u32x a0 = ctx0.h[0];
const u32x b0 = ctx0.h[1];
const u32x c0 = ctx0.h[2];
const u32x d0 = ctx0.h[3];
const u32x e0 = ctx0.h[4];
u32x salt_buf0[4];
u32x salt_buf1[4];
u32x salt_buf2[4];
u32x salt_buf3[4];
salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0];
salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1];
salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2];
salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3];
salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4];
salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5];
salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6];
salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7];
salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8];
salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9];
salt_buf2[2] = 0;
salt_buf2[3] = 0;
salt_buf3[0] = 0;
salt_buf3[1] = 0;
salt_buf3[2] = 0;
salt_buf3[3] = 0;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector_64 (&ctx0, salt_buf0, salt_buf1, salt_buf2, salt_buf3, 40);
/**
* loop
*/
u32x _w0[4];
u32x _w1[4];
u32x _w2[4];
u32x _w3[4];
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
@ -295,78 +275,51 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ())
md5_ctx_vector_t ctx1;
md5_init_vector (&ctx1);
md5_init_vector (&ctx1);
md5_update_vector (&ctx1, w, pw_len);
md5_final_vector (&ctx1);
const u32x a1 = hc_swap32 (ctx1.h[0]);
const u32x b1 = hc_swap32 (ctx1.h[1]);
const u32x c1 = hc_swap32 (ctx1.h[2]);
const u32x d1 = hc_swap32 (ctx1.h[3]);
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
_w0[0] = uint_to_hex_lower8 ((a0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a0 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b0 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c0 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d0 >> 0) & 255) << 16;
_w2[0] = uint_to_hex_lower8 ((e0 >> 24) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 16) & 255) << 16;
_w2[1] = uint_to_hex_lower8 ((e0 >> 8) & 255) << 0
| uint_to_hex_lower8 ((e0 >> 0) & 255) << 16;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 40);
_w0[0] = uint_to_hex_lower8 ((a1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 16) & 255) << 16;
_w0[1] = uint_to_hex_lower8 ((a1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((a1 >> 0) & 255) << 16;
_w0[2] = uint_to_hex_lower8 ((b1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 16) & 255) << 16;
_w0[3] = uint_to_hex_lower8 ((b1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((b1 >> 0) & 255) << 16;
_w1[0] = uint_to_hex_lower8 ((c1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 16) & 255) << 16;
_w1[1] = uint_to_hex_lower8 ((c1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((c1 >> 0) & 255) << 16;
_w1[2] = uint_to_hex_lower8 ((d1 >> 24) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 16) & 255) << 16;
_w1[3] = uint_to_hex_lower8 ((d1 >> 8) & 255) << 0
| uint_to_hex_lower8 ((d1 >> 0) & 255) << 16;
_w2[0] = 0;
_w2[1] = 0;
_w2[2] = 0;
_w2[3] = 0;
_w3[0] = 0;
_w3[1] = 0;
_w3[2] = 0;
_w3[3] = 0;
md5_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 32);
md5_final_vector (&ctx1);
const u32x a = hc_swap32 (ctx1.h[0]);
const u32x b = hc_swap32 (ctx1.h[1]);
const u32x c = hc_swap32 (ctx1.h[2]);
const u32x d = hc_swap32 (ctx1.h[3]);
// add md5_hex ($pass) to ctx0:
u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
w0_t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0
| uint_to_hex_lower8 ((a >> 0) & 255) << 16;
w0_t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0
| uint_to_hex_lower8 ((b >> 16) & 255) << 16;
w0_t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
w1_t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
w1_t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0
| uint_to_hex_lower8 ((c >> 0) & 255) << 16;
w1_t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0
| uint_to_hex_lower8 ((d >> 16) & 255) << 16;
w1_t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
md5_ctx_vector_t ctx = ctx0;
md5_update_vector_64 (&ctx, w0_t, w1_t, w2_t, w3_t, 32);
md5_final_vector (&ctx);

@ -24,6 +24,7 @@
- Compile macOS: Fixed makefile target 'clean' to correctly remove *.dSYM folders
- Compile ZLIB: Fixed makefile include paths in case USE_SYSTEM_ZLIB is used
- Hash-Mode 21200 (md5(sha1($salt).md5($pass))): Improved speed by using pre-computed SHA1
- OpenCL Kernels: Added datatypes to literals of enum costants
- OpenCL Kernels: Added pure kernels for hash-mode 600 (BLAKE2b-512)
- OpenCL Runtime: Add some unstable warnings for some SHA512 based algorithms on AMD GPU on macOS

@ -9,6 +9,7 @@
#include "bitops.h"
#include "convert.h"
#include "shared.h"
#include "emu_inc_hash_sha1.h"
static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL;
static const u32 DGST_POS0 = 0;
@ -98,6 +99,37 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
if (parse_rc == false) return (PARSER_SALT_LENGTH);
// precompute sha1 ($salt) into salt->salt_buf_pc:
u32 s[64];
for (int i = 0; i < 64; i++)
{
s[i] = byte_swap_32 (salt->salt_buf[i]);
}
sha1_ctx_t sha1_ctx;
sha1_init (&sha1_ctx);
sha1_update (&sha1_ctx, s, salt->salt_len);
sha1_final (&sha1_ctx);
u32 pc[4];
pc[0] = byte_swap_32 (sha1_ctx.h[0]);
pc[1] = byte_swap_32 (sha1_ctx.h[1]);
pc[2] = byte_swap_32 (sha1_ctx.h[2]);
pc[3] = byte_swap_32 (sha1_ctx.h[3]);
pc[4] = byte_swap_32 (sha1_ctx.h[4]);
u8 *salt_buf_pc = (u8 *) salt->salt_buf_pc;
u32_to_hex (pc[0], salt_buf_pc + 0);
u32_to_hex (pc[1], salt_buf_pc + 8);
u32_to_hex (pc[2], salt_buf_pc + 16);
u32_to_hex (pc[3], salt_buf_pc + 24);
u32_to_hex (pc[4], salt_buf_pc + 32);
return (PARSER_OK);
}

@ -11,7 +11,7 @@ use warnings;
use Digest::MD5 qw (md5_hex);
use Digest::SHA qw (sha1_hex);
sub module_constraints { [[0, 256], [0, 256], [0, 55], [-1, -1], [-1, -1]] }
sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 256], [-1, -1]] }
sub module_generate_hash
{

Loading…
Cancel
Save