diff --git a/OpenCL/m04520_a1.cl b/OpenCL/m04520_a1.cl index 06f334fc2..ec18f15af 100644 --- a/OpenCL/m04520_a1.cl +++ b/OpenCL/m04520_a1.cl @@ -303,59 +303,37 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule d += SHA1M_D; e += SHA1M_E; - /** - * Prepend salt - */ - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - wa_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - wb_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - wc_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - wd_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - we_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - wf_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - const u32x e_sav = e; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap32 (w0_t); - w1_t = swap32 (w1_t); - w2_t = swap32 (w2_t); - w3_t = swap32 (w3_t); - w4_t = swap32 (w4_t); - w5_t = swap32 (w5_t); - w6_t = swap32 (w6_t); - w7_t = swap32 (w7_t); - w8_t = swap32 (w8_t); - w9_t = swap32 (w9_t); - wa_t = swap32 (wa_t); - wb_t = swap32 (wb_t); - wc_t = swap32 (wc_t); - wd_t = swap32 (wd_t); - we_t = swap32 (we_t); - wf_t = swap32 (wf_t); + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; a = SHA1M_A; b = SHA1M_B; @@ -363,138 +341,203 @@ __kernel void m04520_m04 (__global pw_t *pws, __global const kernel_rule_t *rule d = SHA1M_D; e = SHA1M_E; - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + if (salt_len > 14) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = swap32 (t0[0]); + w1_t = swap32 (t0[1]); + w2_t = swap32 (t0[2]); + w3_t = swap32 (t0[3]); + w4_t = swap32 (t1[0]); + w5_t = swap32 (t1[1]); + w6_t = swap32 (t1[2]); + w7_t = swap32 (t1[3]); + w8_t = swap32 (t2[0]); + w9_t = swap32 (t2[1]); + wa_t = swap32 (t2[2]); + wb_t = swap32 (t2[3]); + wc_t = swap32 (t3[0]); + wd_t = swap32 (t3[1]); + we_t = swap32 (t3[2]); + wf_t = swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = uint_to_hex_lower8 ((e_sav >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e_sav >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((e_sav >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e_sav >> 0) & 255) << 16; - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = swap32 (t0[0]); + w1_t = swap32 (t0[1]); + w2_t = swap32 (t0[2]); + w3_t = swap32 (t0[3]); + w4_t = swap32 (t1[0]); + w5_t = swap32 (t1[1]); + w6_t = swap32 (t1[2]); + w7_t = swap32 (t1[3]); + w8_t = swap32 (t2[0]); + w9_t = swap32 (t2[1]); + wa_t = swap32 (t2[2]); + wb_t = swap32 (t2[3]); + wc_t = swap32 (t3[0]); + wd_t = swap32 (t3[1]); we_t = 0; wf_t = (salt_len + 40) * 8; - w0_t = swap32 (w0_t); - w1_t = swap32 (w1_t); - #undef K #define K SHA1C00 @@ -898,59 +941,37 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule d += SHA1M_D; e += SHA1M_E; - /** - * Prepend salt - */ - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - wa_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - wb_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - wc_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - wd_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - we_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - wf_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - const u32x e_sav = e; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap32 (w0_t); - w1_t = swap32 (w1_t); - w2_t = swap32 (w2_t); - w3_t = swap32 (w3_t); - w4_t = swap32 (w4_t); - w5_t = swap32 (w5_t); - w6_t = swap32 (w6_t); - w7_t = swap32 (w7_t); - w8_t = swap32 (w8_t); - w9_t = swap32 (w9_t); - wa_t = swap32 (wa_t); - wb_t = swap32 (wb_t); - wc_t = swap32 (wc_t); - wd_t = swap32 (wd_t); - we_t = swap32 (we_t); - wf_t = swap32 (wf_t); + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; a = SHA1M_A; b = SHA1M_B; @@ -958,138 +979,203 @@ __kernel void m04520_s04 (__global pw_t *pws, __global const kernel_rule_t *rule d = SHA1M_D; e = SHA1M_E; - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + if (salt_len > 14) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = swap32 (t0[0]); + w1_t = swap32 (t0[1]); + w2_t = swap32 (t0[2]); + w3_t = swap32 (t0[3]); + w4_t = swap32 (t1[0]); + w5_t = swap32 (t1[1]); + w6_t = swap32 (t1[2]); + w7_t = swap32 (t1[3]); + w8_t = swap32 (t2[0]); + w9_t = swap32 (t2[1]); + wa_t = swap32 (t2[2]); + wb_t = swap32 (t2[3]); + wc_t = swap32 (t3[0]); + wd_t = swap32 (t3[1]); + we_t = swap32 (t3[2]); + wf_t = swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = uint_to_hex_lower8 ((e_sav >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e_sav >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((e_sav >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e_sav >> 0) & 255) << 16; - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = swap32 (t0[0]); + w1_t = swap32 (t0[1]); + w2_t = swap32 (t0[2]); + w3_t = swap32 (t0[3]); + w4_t = swap32 (t1[0]); + w5_t = swap32 (t1[1]); + w6_t = swap32 (t1[2]); + w7_t = swap32 (t1[3]); + w8_t = swap32 (t2[0]); + w9_t = swap32 (t2[1]); + wa_t = swap32 (t2[2]); + wb_t = swap32 (t2[3]); + wc_t = swap32 (t3[0]); + wd_t = swap32 (t3[1]); we_t = 0; wf_t = (salt_len + 40) * 8; - w0_t = swap32 (w0_t); - w1_t = swap32 (w1_t); - #undef K #define K SHA1C00