diff --git a/OpenCL/m13500_a0.cl b/OpenCL/m13500_a0.cl index 706948116..60a307db1 100644 --- a/OpenCL/m13500_a0.cl +++ b/OpenCL/m13500_a0.cl @@ -15,381 +15,6 @@ #include "inc_rp.cl" #include "inc_simd.cl" -void memcat64c_be (u32x block[16], const u32 offset, u32x carry[16]) -{ - const u32 mod = offset & 3; - const u32 div = offset / 4; - - u32 tmp00; - u32 tmp01; - u32 tmp02; - u32 tmp03; - u32 tmp04; - u32 tmp05; - u32 tmp06; - u32 tmp07; - u32 tmp08; - u32 tmp09; - u32 tmp10; - u32 tmp11; - u32 tmp12; - u32 tmp13; - u32 tmp14; - u32 tmp15; - u32 tmp16; - - #ifdef IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - - tmp00 = __byte_perm (carry[ 0], 0, selector); - tmp01 = __byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = __byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = __byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = __byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = __byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = __byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = __byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = __byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = __byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = __byte_perm (carry[10], carry[ 9], selector); - tmp11 = __byte_perm (carry[11], carry[10], selector); - tmp12 = __byte_perm (carry[12], carry[11], selector); - tmp13 = __byte_perm (carry[13], carry[12], selector); - tmp14 = __byte_perm (carry[14], carry[13], selector); - tmp15 = __byte_perm (carry[15], carry[14], selector); - tmp16 = __byte_perm ( 0, carry[15], selector); - #endif - - #if defined IS_AMD || defined IS_GENERIC - tmp00 = amd_bytealign ( 0, carry[ 0], offset); - tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset); - tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset); - tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset); - tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset); - tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset); - tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset); - tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset); - tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset); - tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset); - tmp10 = amd_bytealign (carry[ 9], carry[10], offset); - tmp11 = amd_bytealign (carry[10], carry[11], offset); - tmp12 = amd_bytealign (carry[11], carry[12], offset); - tmp13 = amd_bytealign (carry[12], carry[13], offset); - tmp14 = amd_bytealign (carry[13], carry[14], offset); - tmp15 = amd_bytealign (carry[14], carry[15], offset); - tmp16 = amd_bytealign (carry[15], 0, offset); - #endif - - carry[ 0] = 0; - carry[ 1] = 0; - carry[ 2] = 0; - carry[ 3] = 0; - carry[ 4] = 0; - carry[ 5] = 0; - carry[ 6] = 0; - carry[ 7] = 0; - carry[ 8] = 0; - carry[ 9] = 0; - carry[10] = 0; - carry[11] = 0; - carry[12] = 0; - carry[13] = 0; - carry[14] = 0; - carry[15] = 0; - - switch (div) - { - case 0: block[ 0] |= tmp00; - block[ 1] = tmp01; - block[ 2] = tmp02; - block[ 3] = tmp03; - block[ 4] = tmp04; - block[ 5] = tmp05; - block[ 6] = tmp06; - block[ 7] = tmp07; - block[ 8] = tmp08; - block[ 9] = tmp09; - block[10] = tmp10; - block[11] = tmp11; - block[12] = tmp12; - block[13] = tmp13; - block[14] = tmp14; - block[15] = tmp15; - carry[ 0] = tmp16; - break; - case 1: block[ 1] |= tmp00; - block[ 2] = tmp01; - block[ 3] = tmp02; - block[ 4] = tmp03; - block[ 5] = tmp04; - block[ 6] = tmp05; - block[ 7] = tmp06; - block[ 8] = tmp07; - block[ 9] = tmp08; - block[10] = tmp09; - block[11] = tmp10; - block[12] = tmp11; - block[13] = tmp12; - block[14] = tmp13; - block[15] = tmp14; - carry[ 0] = tmp15; - carry[ 1] = tmp16; - break; - case 2: block[ 2] |= tmp00; - block[ 3] = tmp01; - block[ 4] = tmp02; - block[ 5] = tmp03; - block[ 6] = tmp04; - block[ 7] = tmp05; - block[ 8] = tmp06; - block[ 9] = tmp07; - block[10] = tmp08; - block[11] = tmp09; - block[12] = tmp10; - block[13] = tmp11; - block[14] = tmp12; - block[15] = tmp13; - carry[ 0] = tmp14; - carry[ 1] = tmp15; - carry[ 2] = tmp16; - break; - case 3: block[ 3] |= tmp00; - block[ 4] = tmp01; - block[ 5] = tmp02; - block[ 6] = tmp03; - block[ 7] = tmp04; - block[ 8] = tmp05; - block[ 9] = tmp06; - block[10] = tmp07; - block[11] = tmp08; - block[12] = tmp09; - block[13] = tmp10; - block[14] = tmp11; - block[15] = tmp12; - carry[ 0] = tmp13; - carry[ 1] = tmp14; - carry[ 2] = tmp15; - carry[ 3] = tmp16; - break; - case 4: block[ 4] |= tmp00; - block[ 5] = tmp01; - block[ 6] = tmp02; - block[ 7] = tmp03; - block[ 8] = tmp04; - block[ 9] = tmp05; - block[10] = tmp06; - block[11] = tmp07; - block[12] = tmp08; - block[13] = tmp09; - block[14] = tmp10; - block[15] = tmp11; - carry[ 0] = tmp12; - carry[ 1] = tmp13; - carry[ 2] = tmp14; - carry[ 3] = tmp15; - carry[ 4] = tmp16; - break; - case 5: block[ 5] |= tmp00; - block[ 6] = tmp01; - block[ 7] = tmp02; - block[ 8] = tmp03; - block[ 9] = tmp04; - block[10] = tmp05; - block[11] = tmp06; - block[12] = tmp07; - block[13] = tmp08; - block[14] = tmp09; - block[15] = tmp10; - carry[ 0] = tmp11; - carry[ 1] = tmp12; - carry[ 2] = tmp13; - carry[ 3] = tmp14; - carry[ 4] = tmp15; - carry[ 5] = tmp16; - break; - case 6: block[ 6] |= tmp00; - block[ 7] = tmp01; - block[ 8] = tmp02; - block[ 9] = tmp03; - block[10] = tmp04; - block[11] = tmp05; - block[12] = tmp06; - block[13] = tmp07; - block[14] = tmp08; - block[15] = tmp09; - carry[ 0] = tmp10; - carry[ 1] = tmp11; - carry[ 2] = tmp12; - carry[ 3] = tmp13; - carry[ 4] = tmp14; - carry[ 5] = tmp15; - carry[ 6] = tmp16; - break; - case 7: block[ 7] |= tmp00; - block[ 8] = tmp01; - block[ 9] = tmp02; - block[10] = tmp03; - block[11] = tmp04; - block[12] = tmp05; - block[13] = tmp06; - block[14] = tmp07; - block[15] = tmp08; - carry[ 0] = tmp09; - carry[ 1] = tmp10; - carry[ 2] = tmp11; - carry[ 3] = tmp12; - carry[ 4] = tmp13; - carry[ 5] = tmp14; - carry[ 6] = tmp15; - carry[ 7] = tmp16; - break; - case 8: block[ 8] |= tmp00; - block[ 9] = tmp01; - block[10] = tmp02; - block[11] = tmp03; - block[12] = tmp04; - block[13] = tmp05; - block[14] = tmp06; - block[15] = tmp07; - carry[ 0] = tmp08; - carry[ 1] = tmp09; - carry[ 2] = tmp10; - carry[ 3] = tmp11; - carry[ 4] = tmp12; - carry[ 5] = tmp13; - carry[ 6] = tmp14; - carry[ 7] = tmp15; - carry[ 8] = tmp16; - break; - case 9: block[ 9] |= tmp00; - block[10] = tmp01; - block[11] = tmp02; - block[12] = tmp03; - block[13] = tmp04; - block[14] = tmp05; - block[15] = tmp06; - carry[ 0] = tmp07; - carry[ 1] = tmp08; - carry[ 2] = tmp09; - carry[ 3] = tmp10; - carry[ 4] = tmp11; - carry[ 5] = tmp12; - carry[ 6] = tmp13; - carry[ 7] = tmp14; - carry[ 8] = tmp15; - carry[ 9] = tmp16; - break; - case 10: block[10] |= tmp00; - block[11] = tmp01; - block[12] = tmp02; - block[13] = tmp03; - block[14] = tmp04; - block[15] = tmp05; - carry[ 0] = tmp06; - carry[ 1] = tmp07; - carry[ 2] = tmp08; - carry[ 3] = tmp09; - carry[ 4] = tmp10; - carry[ 5] = tmp11; - carry[ 6] = tmp12; - carry[ 7] = tmp13; - carry[ 8] = tmp14; - carry[ 9] = tmp15; - carry[10] = tmp16; - break; - case 11: block[11] |= tmp00; - block[12] = tmp01; - block[13] = tmp02; - block[14] = tmp03; - block[15] = tmp04; - carry[ 0] = tmp05; - carry[ 1] = tmp06; - carry[ 2] = tmp07; - carry[ 3] = tmp08; - carry[ 4] = tmp09; - carry[ 5] = tmp10; - carry[ 6] = tmp11; - carry[ 7] = tmp12; - carry[ 8] = tmp13; - carry[ 9] = tmp14; - carry[10] = tmp15; - carry[11] = tmp16; - break; - case 12: block[12] |= tmp00; - block[13] = tmp01; - block[14] = tmp02; - block[15] = tmp03; - carry[ 0] = tmp04; - carry[ 1] = tmp05; - carry[ 2] = tmp06; - carry[ 3] = tmp07; - carry[ 4] = tmp08; - carry[ 5] = tmp09; - carry[ 6] = tmp10; - carry[ 7] = tmp11; - carry[ 8] = tmp12; - carry[ 9] = tmp13; - carry[10] = tmp14; - carry[11] = tmp15; - carry[12] = tmp16; - break; - case 13: block[13] |= tmp00; - block[14] = tmp01; - block[15] = tmp02; - carry[ 0] = tmp03; - carry[ 1] = tmp04; - carry[ 2] = tmp05; - carry[ 3] = tmp06; - carry[ 4] = tmp07; - carry[ 5] = tmp08; - carry[ 6] = tmp09; - carry[ 7] = tmp10; - carry[ 8] = tmp11; - carry[ 9] = tmp12; - carry[10] = tmp13; - carry[11] = tmp14; - carry[12] = tmp15; - carry[13] = tmp16; - break; - case 14: block[14] |= tmp00; - block[15] = tmp01; - carry[ 0] = tmp02; - carry[ 1] = tmp03; - carry[ 2] = tmp04; - carry[ 3] = tmp05; - carry[ 4] = tmp06; - carry[ 5] = tmp07; - carry[ 6] = tmp08; - carry[ 7] = tmp09; - carry[ 8] = tmp10; - carry[ 9] = tmp11; - carry[10] = tmp12; - carry[11] = tmp13; - carry[12] = tmp14; - carry[13] = tmp15; - carry[14] = tmp16; - break; - case 15: block[15] |= tmp00; - carry[ 0] = tmp01; - carry[ 1] = tmp02; - carry[ 2] = tmp03; - carry[ 3] = tmp04; - carry[ 4] = tmp05; - carry[ 5] = tmp06; - carry[ 6] = tmp07; - carry[ 7] = tmp08; - carry[ 8] = tmp09; - carry[ 9] = tmp10; - carry[10] = tmp11; - carry[11] = tmp12; - carry[12] = tmp13; - carry[13] = tmp14; - carry[14] = tmp15; - carry[15] = tmp16; - break; - } -} - __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** @@ -426,6 +51,8 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -484,45 +111,46 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + w0[0] = swap32 (w0[0]); + w0[1] = swap32 (w0[1]); + w0[2] = swap32 (w0[2]); + w0[3] = swap32 (w0[3]); + w1[0] = swap32 (w1[0]); + w1[1] = swap32 (w1[1]); + w1[2] = swap32 (w1[2]); + w1[3] = swap32 (w1[3]); + w2[0] = swap32 (w2[0]); + w2[1] = swap32 (w2[1]); + w2[2] = swap32 (w2[2]); + w2[3] = swap32 (w2[3]); + w3[0] = swap32 (w3[0]); + w3[1] = swap32 (w3[1]); + w3[2] = swap32 (w3[2]); + w3[3] = swap32 (w3[3]); - carry[ 0] = swap32 (w0[0]); - carry[ 1] = swap32 (w0[1]); - carry[ 2] = swap32 (w0[2]); - carry[ 3] = swap32 (w0[3]); - carry[ 4] = swap32 (w1[0]); - carry[ 5] = swap32 (w1[1]); - carry[ 6] = swap32 (w1[2]); - carry[ 7] = swap32 (w1[3]); - carry[ 8] = swap32 (w2[0]); - carry[ 9] = swap32 (w2[1]); - carry[10] = swap32 (w2[2]); - carry[11] = swap32 (w2[3]); - carry[12] = swap32 (w3[0]); - carry[13] = swap32 (w3[1]); - carry[14] = swap32 (w3[2]); - carry[15] = swap32 (w3[3]); + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - u32x w[16]; + switch_buffer_by_offset_carry_be (w0, w1, w2, w3, c0, c1, c2, c3, salt_len - pc_offset4); - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; - - memcat64c_be (w, salt_len & 0x3f, carry); + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -530,24 +158,26 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + out_len2) >= 56) + const u32 r = (salt_len - pc_offset4) + out_len2; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; #undef K #define K SHA1C00 @@ -651,42 +281,42 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0[0] = c0[0]; + w0[1] = c0[1]; + w0[2] = c0[2]; + w0[3] = c0[3]; + w1[0] = c1[0]; + w1[1] = c1[1]; + w1[2] = c1[2]; + w1[3] = c1[3]; + w2[0] = c2[0]; + w2[1] = c2[1]; + w2[2] = c2[2]; + w2[3] = c2[3]; + w3[0] = c3[0]; + w3[1] = c3[1]; + w3[2] = c3[2]; + w3[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; u32x we_t = 0; u32x wf_t = out_salt_len * 8; @@ -846,6 +476,8 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -916,45 +548,46 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + w0[0] = swap32 (w0[0]); + w0[1] = swap32 (w0[1]); + w0[2] = swap32 (w0[2]); + w0[3] = swap32 (w0[3]); + w1[0] = swap32 (w1[0]); + w1[1] = swap32 (w1[1]); + w1[2] = swap32 (w1[2]); + w1[3] = swap32 (w1[3]); + w2[0] = swap32 (w2[0]); + w2[1] = swap32 (w2[1]); + w2[2] = swap32 (w2[2]); + w2[3] = swap32 (w2[3]); + w3[0] = swap32 (w3[0]); + w3[1] = swap32 (w3[1]); + w3[2] = swap32 (w3[2]); + w3[3] = swap32 (w3[3]); - carry[ 0] = swap32 (w0[0]); - carry[ 1] = swap32 (w0[1]); - carry[ 2] = swap32 (w0[2]); - carry[ 3] = swap32 (w0[3]); - carry[ 4] = swap32 (w1[0]); - carry[ 5] = swap32 (w1[1]); - carry[ 6] = swap32 (w1[2]); - carry[ 7] = swap32 (w1[3]); - carry[ 8] = swap32 (w2[0]); - carry[ 9] = swap32 (w2[1]); - carry[10] = swap32 (w2[2]); - carry[11] = swap32 (w2[3]); - carry[12] = swap32 (w3[0]); - carry[13] = swap32 (w3[1]); - carry[14] = swap32 (w3[2]); - carry[15] = swap32 (w3[3]); + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - u32x w[16]; + switch_buffer_by_offset_carry_be (w0, w1, w2, w3, c0, c1, c2, c3, salt_len - pc_offset4); - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; - - memcat64c_be (w, salt_len & 0x3f, carry); + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -962,24 +595,26 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + out_len2) >= 56) + const u32 r = (salt_len - pc_offset4) + out_len2; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1083,42 +718,42 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0[0] = c0[0]; + w0[1] = c0[1]; + w0[2] = c0[2]; + w0[3] = c0[3]; + w1[0] = c1[0]; + w1[1] = c1[1]; + w1[2] = c1[2]; + w1[3] = c1[3]; + w2[0] = c2[0]; + w2[1] = c2[1]; + w2[2] = c2[2]; + w2[3] = c2[3]; + w3[0] = c3[0]; + w3[1] = c3[1]; + w3[2] = c3[2]; + w3[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; u32x we_t = 0; u32x wf_t = out_salt_len * 8; diff --git a/OpenCL/m13500_a1.cl b/OpenCL/m13500_a1.cl index 82ed51834..b08309b32 100644 --- a/OpenCL/m13500_a1.cl +++ b/OpenCL/m13500_a1.cl @@ -13,381 +13,6 @@ #include "inc_common.cl" #include "inc_simd.cl" -void memcat64c_be (u32x block[16], const u32 offset, u32x carry[16]) -{ - const u32 mod = offset & 3; - const u32 div = offset / 4; - - u32 tmp00; - u32 tmp01; - u32 tmp02; - u32 tmp03; - u32 tmp04; - u32 tmp05; - u32 tmp06; - u32 tmp07; - u32 tmp08; - u32 tmp09; - u32 tmp10; - u32 tmp11; - u32 tmp12; - u32 tmp13; - u32 tmp14; - u32 tmp15; - u32 tmp16; - - #ifdef IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - - tmp00 = __byte_perm (carry[ 0], 0, selector); - tmp01 = __byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = __byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = __byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = __byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = __byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = __byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = __byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = __byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = __byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = __byte_perm (carry[10], carry[ 9], selector); - tmp11 = __byte_perm (carry[11], carry[10], selector); - tmp12 = __byte_perm (carry[12], carry[11], selector); - tmp13 = __byte_perm (carry[13], carry[12], selector); - tmp14 = __byte_perm (carry[14], carry[13], selector); - tmp15 = __byte_perm (carry[15], carry[14], selector); - tmp16 = __byte_perm ( 0, carry[15], selector); - #endif - - #if defined IS_AMD || defined IS_GENERIC - tmp00 = amd_bytealign ( 0, carry[ 0], offset); - tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset); - tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset); - tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset); - tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset); - tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset); - tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset); - tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset); - tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset); - tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset); - tmp10 = amd_bytealign (carry[ 9], carry[10], offset); - tmp11 = amd_bytealign (carry[10], carry[11], offset); - tmp12 = amd_bytealign (carry[11], carry[12], offset); - tmp13 = amd_bytealign (carry[12], carry[13], offset); - tmp14 = amd_bytealign (carry[13], carry[14], offset); - tmp15 = amd_bytealign (carry[14], carry[15], offset); - tmp16 = amd_bytealign (carry[15], 0, offset); - #endif - - carry[ 0] = 0; - carry[ 1] = 0; - carry[ 2] = 0; - carry[ 3] = 0; - carry[ 4] = 0; - carry[ 5] = 0; - carry[ 6] = 0; - carry[ 7] = 0; - carry[ 8] = 0; - carry[ 9] = 0; - carry[10] = 0; - carry[11] = 0; - carry[12] = 0; - carry[13] = 0; - carry[14] = 0; - carry[15] = 0; - - switch (div) - { - case 0: block[ 0] |= tmp00; - block[ 1] = tmp01; - block[ 2] = tmp02; - block[ 3] = tmp03; - block[ 4] = tmp04; - block[ 5] = tmp05; - block[ 6] = tmp06; - block[ 7] = tmp07; - block[ 8] = tmp08; - block[ 9] = tmp09; - block[10] = tmp10; - block[11] = tmp11; - block[12] = tmp12; - block[13] = tmp13; - block[14] = tmp14; - block[15] = tmp15; - carry[ 0] = tmp16; - break; - case 1: block[ 1] |= tmp00; - block[ 2] = tmp01; - block[ 3] = tmp02; - block[ 4] = tmp03; - block[ 5] = tmp04; - block[ 6] = tmp05; - block[ 7] = tmp06; - block[ 8] = tmp07; - block[ 9] = tmp08; - block[10] = tmp09; - block[11] = tmp10; - block[12] = tmp11; - block[13] = tmp12; - block[14] = tmp13; - block[15] = tmp14; - carry[ 0] = tmp15; - carry[ 1] = tmp16; - break; - case 2: block[ 2] |= tmp00; - block[ 3] = tmp01; - block[ 4] = tmp02; - block[ 5] = tmp03; - block[ 6] = tmp04; - block[ 7] = tmp05; - block[ 8] = tmp06; - block[ 9] = tmp07; - block[10] = tmp08; - block[11] = tmp09; - block[12] = tmp10; - block[13] = tmp11; - block[14] = tmp12; - block[15] = tmp13; - carry[ 0] = tmp14; - carry[ 1] = tmp15; - carry[ 2] = tmp16; - break; - case 3: block[ 3] |= tmp00; - block[ 4] = tmp01; - block[ 5] = tmp02; - block[ 6] = tmp03; - block[ 7] = tmp04; - block[ 8] = tmp05; - block[ 9] = tmp06; - block[10] = tmp07; - block[11] = tmp08; - block[12] = tmp09; - block[13] = tmp10; - block[14] = tmp11; - block[15] = tmp12; - carry[ 0] = tmp13; - carry[ 1] = tmp14; - carry[ 2] = tmp15; - carry[ 3] = tmp16; - break; - case 4: block[ 4] |= tmp00; - block[ 5] = tmp01; - block[ 6] = tmp02; - block[ 7] = tmp03; - block[ 8] = tmp04; - block[ 9] = tmp05; - block[10] = tmp06; - block[11] = tmp07; - block[12] = tmp08; - block[13] = tmp09; - block[14] = tmp10; - block[15] = tmp11; - carry[ 0] = tmp12; - carry[ 1] = tmp13; - carry[ 2] = tmp14; - carry[ 3] = tmp15; - carry[ 4] = tmp16; - break; - case 5: block[ 5] |= tmp00; - block[ 6] = tmp01; - block[ 7] = tmp02; - block[ 8] = tmp03; - block[ 9] = tmp04; - block[10] = tmp05; - block[11] = tmp06; - block[12] = tmp07; - block[13] = tmp08; - block[14] = tmp09; - block[15] = tmp10; - carry[ 0] = tmp11; - carry[ 1] = tmp12; - carry[ 2] = tmp13; - carry[ 3] = tmp14; - carry[ 4] = tmp15; - carry[ 5] = tmp16; - break; - case 6: block[ 6] |= tmp00; - block[ 7] = tmp01; - block[ 8] = tmp02; - block[ 9] = tmp03; - block[10] = tmp04; - block[11] = tmp05; - block[12] = tmp06; - block[13] = tmp07; - block[14] = tmp08; - block[15] = tmp09; - carry[ 0] = tmp10; - carry[ 1] = tmp11; - carry[ 2] = tmp12; - carry[ 3] = tmp13; - carry[ 4] = tmp14; - carry[ 5] = tmp15; - carry[ 6] = tmp16; - break; - case 7: block[ 7] |= tmp00; - block[ 8] = tmp01; - block[ 9] = tmp02; - block[10] = tmp03; - block[11] = tmp04; - block[12] = tmp05; - block[13] = tmp06; - block[14] = tmp07; - block[15] = tmp08; - carry[ 0] = tmp09; - carry[ 1] = tmp10; - carry[ 2] = tmp11; - carry[ 3] = tmp12; - carry[ 4] = tmp13; - carry[ 5] = tmp14; - carry[ 6] = tmp15; - carry[ 7] = tmp16; - break; - case 8: block[ 8] |= tmp00; - block[ 9] = tmp01; - block[10] = tmp02; - block[11] = tmp03; - block[12] = tmp04; - block[13] = tmp05; - block[14] = tmp06; - block[15] = tmp07; - carry[ 0] = tmp08; - carry[ 1] = tmp09; - carry[ 2] = tmp10; - carry[ 3] = tmp11; - carry[ 4] = tmp12; - carry[ 5] = tmp13; - carry[ 6] = tmp14; - carry[ 7] = tmp15; - carry[ 8] = tmp16; - break; - case 9: block[ 9] |= tmp00; - block[10] = tmp01; - block[11] = tmp02; - block[12] = tmp03; - block[13] = tmp04; - block[14] = tmp05; - block[15] = tmp06; - carry[ 0] = tmp07; - carry[ 1] = tmp08; - carry[ 2] = tmp09; - carry[ 3] = tmp10; - carry[ 4] = tmp11; - carry[ 5] = tmp12; - carry[ 6] = tmp13; - carry[ 7] = tmp14; - carry[ 8] = tmp15; - carry[ 9] = tmp16; - break; - case 10: block[10] |= tmp00; - block[11] = tmp01; - block[12] = tmp02; - block[13] = tmp03; - block[14] = tmp04; - block[15] = tmp05; - carry[ 0] = tmp06; - carry[ 1] = tmp07; - carry[ 2] = tmp08; - carry[ 3] = tmp09; - carry[ 4] = tmp10; - carry[ 5] = tmp11; - carry[ 6] = tmp12; - carry[ 7] = tmp13; - carry[ 8] = tmp14; - carry[ 9] = tmp15; - carry[10] = tmp16; - break; - case 11: block[11] |= tmp00; - block[12] = tmp01; - block[13] = tmp02; - block[14] = tmp03; - block[15] = tmp04; - carry[ 0] = tmp05; - carry[ 1] = tmp06; - carry[ 2] = tmp07; - carry[ 3] = tmp08; - carry[ 4] = tmp09; - carry[ 5] = tmp10; - carry[ 6] = tmp11; - carry[ 7] = tmp12; - carry[ 8] = tmp13; - carry[ 9] = tmp14; - carry[10] = tmp15; - carry[11] = tmp16; - break; - case 12: block[12] |= tmp00; - block[13] = tmp01; - block[14] = tmp02; - block[15] = tmp03; - carry[ 0] = tmp04; - carry[ 1] = tmp05; - carry[ 2] = tmp06; - carry[ 3] = tmp07; - carry[ 4] = tmp08; - carry[ 5] = tmp09; - carry[ 6] = tmp10; - carry[ 7] = tmp11; - carry[ 8] = tmp12; - carry[ 9] = tmp13; - carry[10] = tmp14; - carry[11] = tmp15; - carry[12] = tmp16; - break; - case 13: block[13] |= tmp00; - block[14] = tmp01; - block[15] = tmp02; - carry[ 0] = tmp03; - carry[ 1] = tmp04; - carry[ 2] = tmp05; - carry[ 3] = tmp06; - carry[ 4] = tmp07; - carry[ 5] = tmp08; - carry[ 6] = tmp09; - carry[ 7] = tmp10; - carry[ 8] = tmp11; - carry[ 9] = tmp12; - carry[10] = tmp13; - carry[11] = tmp14; - carry[12] = tmp15; - carry[13] = tmp16; - break; - case 14: block[14] |= tmp00; - block[15] = tmp01; - carry[ 0] = tmp02; - carry[ 1] = tmp03; - carry[ 2] = tmp04; - carry[ 3] = tmp05; - carry[ 4] = tmp06; - carry[ 5] = tmp07; - carry[ 6] = tmp08; - carry[ 7] = tmp09; - carry[ 8] = tmp10; - carry[ 9] = tmp11; - carry[10] = tmp12; - carry[11] = tmp13; - carry[12] = tmp14; - carry[13] = tmp15; - carry[14] = tmp16; - break; - case 15: block[15] |= tmp00; - carry[ 0] = tmp01; - carry[ 1] = tmp02; - carry[ 2] = tmp03; - carry[ 3] = tmp04; - carry[ 4] = tmp05; - carry[ 5] = tmp06; - carry[ 6] = tmp07; - carry[ 7] = tmp08; - carry[ 8] = tmp09; - carry[ 9] = tmp10; - carry[10] = tmp11; - carry[11] = tmp12; - carry[12] = tmp13; - carry[13] = tmp14; - carry[14] = tmp15; - carry[15] = tmp16; - break; - } -} - __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** @@ -424,6 +49,8 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -534,45 +161,46 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + w0[0] = swap32 (w0[0]); + w0[1] = swap32 (w0[1]); + w0[2] = swap32 (w0[2]); + w0[3] = swap32 (w0[3]); + w1[0] = swap32 (w1[0]); + w1[1] = swap32 (w1[1]); + w1[2] = swap32 (w1[2]); + w1[3] = swap32 (w1[3]); + w2[0] = swap32 (w2[0]); + w2[1] = swap32 (w2[1]); + w2[2] = swap32 (w2[2]); + w2[3] = swap32 (w2[3]); + w3[0] = swap32 (w3[0]); + w3[1] = swap32 (w3[1]); + w3[2] = swap32 (w3[2]); + w3[3] = swap32 (w3[3]); - carry[ 0] = swap32 (w0[0]); - carry[ 1] = swap32 (w0[1]); - carry[ 2] = swap32 (w0[2]); - carry[ 3] = swap32 (w0[3]); - carry[ 4] = swap32 (w1[0]); - carry[ 5] = swap32 (w1[1]); - carry[ 6] = swap32 (w1[2]); - carry[ 7] = swap32 (w1[3]); - carry[ 8] = swap32 (w2[0]); - carry[ 9] = swap32 (w2[1]); - carry[10] = swap32 (w2[2]); - carry[11] = swap32 (w2[3]); - carry[12] = swap32 (w3[0]); - carry[13] = swap32 (w3[1]); - carry[14] = swap32 (w3[2]); - carry[15] = swap32 (w3[3]); + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - u32x w[16]; + switch_buffer_by_offset_carry_be (w0, w1, w2, w3, c0, c1, c2, c3, salt_len - pc_offset4); - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; - - memcat64c_be (w, salt_len & 0x3f, carry); + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -580,24 +208,26 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + pw_len2) >= 56) + const u32 r = (salt_len - pc_offset4) + pw_len2; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; #undef K #define K SHA1C00 @@ -701,42 +331,42 @@ __kernel void m13500_m04 (__global pw_t *pws, __global const kernel_rule_t *rule d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0[0] = c0[0]; + w0[1] = c0[1]; + w0[2] = c0[2]; + w0[3] = c0[3]; + w1[0] = c1[0]; + w1[1] = c1[1]; + w1[2] = c1[2]; + w1[3] = c1[3]; + w2[0] = c2[0]; + w2[1] = c2[1]; + w2[2] = c2[2]; + w2[3] = c2[3]; + w3[0] = c3[0]; + w3[1] = c3[1]; + w3[2] = c3[2]; + w3[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; u32x we_t = 0; u32x wf_t = pw_salt_len * 8; @@ -896,6 +526,8 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -1018,45 +650,46 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + w0[0] = swap32 (w0[0]); + w0[1] = swap32 (w0[1]); + w0[2] = swap32 (w0[2]); + w0[3] = swap32 (w0[3]); + w1[0] = swap32 (w1[0]); + w1[1] = swap32 (w1[1]); + w1[2] = swap32 (w1[2]); + w1[3] = swap32 (w1[3]); + w2[0] = swap32 (w2[0]); + w2[1] = swap32 (w2[1]); + w2[2] = swap32 (w2[2]); + w2[3] = swap32 (w2[3]); + w3[0] = swap32 (w3[0]); + w3[1] = swap32 (w3[1]); + w3[2] = swap32 (w3[2]); + w3[3] = swap32 (w3[3]); - carry[ 0] = swap32 (w0[0]); - carry[ 1] = swap32 (w0[1]); - carry[ 2] = swap32 (w0[2]); - carry[ 3] = swap32 (w0[3]); - carry[ 4] = swap32 (w1[0]); - carry[ 5] = swap32 (w1[1]); - carry[ 6] = swap32 (w1[2]); - carry[ 7] = swap32 (w1[3]); - carry[ 8] = swap32 (w2[0]); - carry[ 9] = swap32 (w2[1]); - carry[10] = swap32 (w2[2]); - carry[11] = swap32 (w2[3]); - carry[12] = swap32 (w3[0]); - carry[13] = swap32 (w3[1]); - carry[14] = swap32 (w3[2]); - carry[15] = swap32 (w3[3]); + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - u32x w[16]; + switch_buffer_by_offset_carry_be (w0, w1, w2, w3, c0, c1, c2, c3, salt_len - pc_offset4); - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; - - memcat64c_be (w, salt_len & 0x3f, carry); + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -1064,24 +697,26 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + pw_len2) >= 56) + const u32 r = (salt_len - pc_offset4) + pw_len2; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1185,42 +820,42 @@ __kernel void m13500_s04 (__global pw_t *pws, __global const kernel_rule_t *rule d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0[0] = c0[0]; + w0[1] = c0[1]; + w0[2] = c0[2]; + w0[3] = c0[3]; + w1[0] = c1[0]; + w1[1] = c1[1]; + w1[2] = c1[2]; + w1[3] = c1[3]; + w2[0] = c2[0]; + w2[1] = c2[1]; + w2[2] = c2[2]; + w2[3] = c2[3]; + w3[0] = c3[0]; + w3[1] = c3[1]; + w3[2] = c3[2]; + w3[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; u32x we_t = 0; u32x wf_t = pw_salt_len * 8; diff --git a/OpenCL/m13500_a3.cl b/OpenCL/m13500_a3.cl index 5aafe198d..533cf4cc3 100644 --- a/OpenCL/m13500_a3.cl +++ b/OpenCL/m13500_a3.cl @@ -13,381 +13,6 @@ #include "inc_common.cl" #include "inc_simd.cl" -void memcat64c_be (u32x block[16], const u32 offset, u32x carry[16]) -{ - const u32 mod = offset & 3; - const u32 div = offset / 4; - - u32 tmp00; - u32 tmp01; - u32 tmp02; - u32 tmp03; - u32 tmp04; - u32 tmp05; - u32 tmp06; - u32 tmp07; - u32 tmp08; - u32 tmp09; - u32 tmp10; - u32 tmp11; - u32 tmp12; - u32 tmp13; - u32 tmp14; - u32 tmp15; - u32 tmp16; - - #ifdef IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - - tmp00 = __byte_perm (carry[ 0], 0, selector); - tmp01 = __byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = __byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = __byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = __byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = __byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = __byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = __byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = __byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = __byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = __byte_perm (carry[10], carry[ 9], selector); - tmp11 = __byte_perm (carry[11], carry[10], selector); - tmp12 = __byte_perm (carry[12], carry[11], selector); - tmp13 = __byte_perm (carry[13], carry[12], selector); - tmp14 = __byte_perm (carry[14], carry[13], selector); - tmp15 = __byte_perm (carry[15], carry[14], selector); - tmp16 = __byte_perm ( 0, carry[15], selector); - #endif - - #if defined IS_AMD || defined IS_GENERIC - tmp00 = amd_bytealign ( 0, carry[ 0], offset); - tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset); - tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset); - tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset); - tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset); - tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset); - tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset); - tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset); - tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset); - tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset); - tmp10 = amd_bytealign (carry[ 9], carry[10], offset); - tmp11 = amd_bytealign (carry[10], carry[11], offset); - tmp12 = amd_bytealign (carry[11], carry[12], offset); - tmp13 = amd_bytealign (carry[12], carry[13], offset); - tmp14 = amd_bytealign (carry[13], carry[14], offset); - tmp15 = amd_bytealign (carry[14], carry[15], offset); - tmp16 = amd_bytealign (carry[15], 0, offset); - #endif - - carry[ 0] = 0; - carry[ 1] = 0; - carry[ 2] = 0; - carry[ 3] = 0; - carry[ 4] = 0; - carry[ 5] = 0; - carry[ 6] = 0; - carry[ 7] = 0; - carry[ 8] = 0; - carry[ 9] = 0; - carry[10] = 0; - carry[11] = 0; - carry[12] = 0; - carry[13] = 0; - carry[14] = 0; - carry[15] = 0; - - switch (div) - { - case 0: block[ 0] |= tmp00; - block[ 1] = tmp01; - block[ 2] = tmp02; - block[ 3] = tmp03; - block[ 4] = tmp04; - block[ 5] = tmp05; - block[ 6] = tmp06; - block[ 7] = tmp07; - block[ 8] = tmp08; - block[ 9] = tmp09; - block[10] = tmp10; - block[11] = tmp11; - block[12] = tmp12; - block[13] = tmp13; - block[14] = tmp14; - block[15] = tmp15; - carry[ 0] = tmp16; - break; - case 1: block[ 1] |= tmp00; - block[ 2] = tmp01; - block[ 3] = tmp02; - block[ 4] = tmp03; - block[ 5] = tmp04; - block[ 6] = tmp05; - block[ 7] = tmp06; - block[ 8] = tmp07; - block[ 9] = tmp08; - block[10] = tmp09; - block[11] = tmp10; - block[12] = tmp11; - block[13] = tmp12; - block[14] = tmp13; - block[15] = tmp14; - carry[ 0] = tmp15; - carry[ 1] = tmp16; - break; - case 2: block[ 2] |= tmp00; - block[ 3] = tmp01; - block[ 4] = tmp02; - block[ 5] = tmp03; - block[ 6] = tmp04; - block[ 7] = tmp05; - block[ 8] = tmp06; - block[ 9] = tmp07; - block[10] = tmp08; - block[11] = tmp09; - block[12] = tmp10; - block[13] = tmp11; - block[14] = tmp12; - block[15] = tmp13; - carry[ 0] = tmp14; - carry[ 1] = tmp15; - carry[ 2] = tmp16; - break; - case 3: block[ 3] |= tmp00; - block[ 4] = tmp01; - block[ 5] = tmp02; - block[ 6] = tmp03; - block[ 7] = tmp04; - block[ 8] = tmp05; - block[ 9] = tmp06; - block[10] = tmp07; - block[11] = tmp08; - block[12] = tmp09; - block[13] = tmp10; - block[14] = tmp11; - block[15] = tmp12; - carry[ 0] = tmp13; - carry[ 1] = tmp14; - carry[ 2] = tmp15; - carry[ 3] = tmp16; - break; - case 4: block[ 4] |= tmp00; - block[ 5] = tmp01; - block[ 6] = tmp02; - block[ 7] = tmp03; - block[ 8] = tmp04; - block[ 9] = tmp05; - block[10] = tmp06; - block[11] = tmp07; - block[12] = tmp08; - block[13] = tmp09; - block[14] = tmp10; - block[15] = tmp11; - carry[ 0] = tmp12; - carry[ 1] = tmp13; - carry[ 2] = tmp14; - carry[ 3] = tmp15; - carry[ 4] = tmp16; - break; - case 5: block[ 5] |= tmp00; - block[ 6] = tmp01; - block[ 7] = tmp02; - block[ 8] = tmp03; - block[ 9] = tmp04; - block[10] = tmp05; - block[11] = tmp06; - block[12] = tmp07; - block[13] = tmp08; - block[14] = tmp09; - block[15] = tmp10; - carry[ 0] = tmp11; - carry[ 1] = tmp12; - carry[ 2] = tmp13; - carry[ 3] = tmp14; - carry[ 4] = tmp15; - carry[ 5] = tmp16; - break; - case 6: block[ 6] |= tmp00; - block[ 7] = tmp01; - block[ 8] = tmp02; - block[ 9] = tmp03; - block[10] = tmp04; - block[11] = tmp05; - block[12] = tmp06; - block[13] = tmp07; - block[14] = tmp08; - block[15] = tmp09; - carry[ 0] = tmp10; - carry[ 1] = tmp11; - carry[ 2] = tmp12; - carry[ 3] = tmp13; - carry[ 4] = tmp14; - carry[ 5] = tmp15; - carry[ 6] = tmp16; - break; - case 7: block[ 7] |= tmp00; - block[ 8] = tmp01; - block[ 9] = tmp02; - block[10] = tmp03; - block[11] = tmp04; - block[12] = tmp05; - block[13] = tmp06; - block[14] = tmp07; - block[15] = tmp08; - carry[ 0] = tmp09; - carry[ 1] = tmp10; - carry[ 2] = tmp11; - carry[ 3] = tmp12; - carry[ 4] = tmp13; - carry[ 5] = tmp14; - carry[ 6] = tmp15; - carry[ 7] = tmp16; - break; - case 8: block[ 8] |= tmp00; - block[ 9] = tmp01; - block[10] = tmp02; - block[11] = tmp03; - block[12] = tmp04; - block[13] = tmp05; - block[14] = tmp06; - block[15] = tmp07; - carry[ 0] = tmp08; - carry[ 1] = tmp09; - carry[ 2] = tmp10; - carry[ 3] = tmp11; - carry[ 4] = tmp12; - carry[ 5] = tmp13; - carry[ 6] = tmp14; - carry[ 7] = tmp15; - carry[ 8] = tmp16; - break; - case 9: block[ 9] |= tmp00; - block[10] = tmp01; - block[11] = tmp02; - block[12] = tmp03; - block[13] = tmp04; - block[14] = tmp05; - block[15] = tmp06; - carry[ 0] = tmp07; - carry[ 1] = tmp08; - carry[ 2] = tmp09; - carry[ 3] = tmp10; - carry[ 4] = tmp11; - carry[ 5] = tmp12; - carry[ 6] = tmp13; - carry[ 7] = tmp14; - carry[ 8] = tmp15; - carry[ 9] = tmp16; - break; - case 10: block[10] |= tmp00; - block[11] = tmp01; - block[12] = tmp02; - block[13] = tmp03; - block[14] = tmp04; - block[15] = tmp05; - carry[ 0] = tmp06; - carry[ 1] = tmp07; - carry[ 2] = tmp08; - carry[ 3] = tmp09; - carry[ 4] = tmp10; - carry[ 5] = tmp11; - carry[ 6] = tmp12; - carry[ 7] = tmp13; - carry[ 8] = tmp14; - carry[ 9] = tmp15; - carry[10] = tmp16; - break; - case 11: block[11] |= tmp00; - block[12] = tmp01; - block[13] = tmp02; - block[14] = tmp03; - block[15] = tmp04; - carry[ 0] = tmp05; - carry[ 1] = tmp06; - carry[ 2] = tmp07; - carry[ 3] = tmp08; - carry[ 4] = tmp09; - carry[ 5] = tmp10; - carry[ 6] = tmp11; - carry[ 7] = tmp12; - carry[ 8] = tmp13; - carry[ 9] = tmp14; - carry[10] = tmp15; - carry[11] = tmp16; - break; - case 12: block[12] |= tmp00; - block[13] = tmp01; - block[14] = tmp02; - block[15] = tmp03; - carry[ 0] = tmp04; - carry[ 1] = tmp05; - carry[ 2] = tmp06; - carry[ 3] = tmp07; - carry[ 4] = tmp08; - carry[ 5] = tmp09; - carry[ 6] = tmp10; - carry[ 7] = tmp11; - carry[ 8] = tmp12; - carry[ 9] = tmp13; - carry[10] = tmp14; - carry[11] = tmp15; - carry[12] = tmp16; - break; - case 13: block[13] |= tmp00; - block[14] = tmp01; - block[15] = tmp02; - carry[ 0] = tmp03; - carry[ 1] = tmp04; - carry[ 2] = tmp05; - carry[ 3] = tmp06; - carry[ 4] = tmp07; - carry[ 5] = tmp08; - carry[ 6] = tmp09; - carry[ 7] = tmp10; - carry[ 8] = tmp11; - carry[ 9] = tmp12; - carry[10] = tmp13; - carry[11] = tmp14; - carry[12] = tmp15; - carry[13] = tmp16; - break; - case 14: block[14] |= tmp00; - block[15] = tmp01; - carry[ 0] = tmp02; - carry[ 1] = tmp03; - carry[ 2] = tmp04; - carry[ 3] = tmp05; - carry[ 4] = tmp06; - carry[ 5] = tmp07; - carry[ 6] = tmp08; - carry[ 7] = tmp09; - carry[ 8] = tmp10; - carry[ 9] = tmp11; - carry[10] = tmp12; - carry[11] = tmp13; - carry[12] = tmp14; - carry[13] = tmp15; - carry[14] = tmp16; - break; - case 15: block[15] |= tmp00; - carry[ 0] = tmp01; - carry[ 1] = tmp02; - carry[ 2] = tmp03; - carry[ 3] = tmp04; - carry[ 4] = tmp05; - carry[ 5] = tmp06; - carry[ 6] = tmp07; - carry[ 7] = tmp08; - carry[ 8] = tmp09; - carry[ 9] = tmp10; - carry[10] = tmp11; - carry[11] = tmp12; - carry[12] = tmp13; - carry[13] = tmp14; - carry[14] = tmp15; - carry[15] = tmp16; - break; - } -} - void m13500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global pstoken_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** @@ -403,6 +28,8 @@ void m13500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -453,45 +80,51 @@ void m13500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + u32x w0_t2[4]; + u32x w1_t2[4]; + u32x w2_t2[4]; + u32x w3_t2[4]; - carry[ 0] = w0lr; - carry[ 1] = w0[1]; - carry[ 2] = w0[2]; - carry[ 3] = w0[3]; - carry[ 4] = w1[0]; - carry[ 5] = w1[1]; - carry[ 6] = w1[2]; - carry[ 7] = w1[3]; - carry[ 8] = w2[0]; - carry[ 9] = w2[1]; - carry[10] = w2[2]; - carry[11] = w2[3]; - carry[12] = w3[0]; - carry[13] = w3[1]; - carry[14] = w3[2]; - carry[15] = w3[3]; + w0_t2[0] = w0lr; + w0_t2[1] = w0[1]; + w0_t2[2] = w0[2]; + w0_t2[3] = w0[3]; + w1_t2[0] = w1[0]; + w1_t2[1] = w1[1]; + w1_t2[2] = w1[2]; + w1_t2[3] = w1[3]; + w2_t2[0] = w2[0]; + w2_t2[1] = w2[1]; + w2_t2[2] = w2[2]; + w2_t2[3] = w2[3]; + w3_t2[0] = w3[0]; + w3_t2[1] = w3[1]; + w3_t2[2] = w3[2]; + w3_t2[3] = w3[3]; - u32x w[16]; + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; + switch_buffer_by_offset_carry_be (w0_t2, w1_t2, w2_t2, w3_t2, c0, c1, c2, c3, salt_len - pc_offset4); - memcat64c_be (w, salt_len & 0x3f, carry); + w0_t2[0] |= salt_buf0[0]; + w0_t2[1] |= salt_buf0[1]; + w0_t2[2] |= salt_buf0[2]; + w0_t2[3] |= salt_buf0[3]; + w1_t2[0] |= salt_buf1[0]; + w1_t2[1] |= salt_buf1[1]; + w1_t2[2] |= salt_buf1[2]; + w1_t2[3] |= salt_buf1[3]; + w2_t2[0] |= salt_buf2[0]; + w2_t2[1] |= salt_buf2[1]; + w2_t2[2] |= salt_buf2[2]; + w2_t2[3] |= salt_buf2[3]; + w3_t2[0] |= salt_buf3[0]; + w3_t2[1] |= salt_buf3[1]; + w3_t2[2] |= salt_buf3[2]; + w3_t2[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -499,24 +132,26 @@ void m13500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + pw_len) >= 56) + const u32 r = (salt_len - pc_offset4) + pw_len; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0_t2[0]; + u32x w1_t = w0_t2[1]; + u32x w2_t = w0_t2[2]; + u32x w3_t = w0_t2[3]; + u32x w4_t = w1_t2[0]; + u32x w5_t = w1_t2[1]; + u32x w6_t = w1_t2[2]; + u32x w7_t = w1_t2[3]; + u32x w8_t = w2_t2[0]; + u32x w9_t = w2_t2[1]; + u32x wa_t = w2_t2[2]; + u32x wb_t = w2_t2[3]; + u32x wc_t = w3_t2[0]; + u32x wd_t = w3_t2[1]; + u32x we_t = w3_t2[2]; + u32x wf_t = w3_t2[3]; #undef K #define K SHA1C00 @@ -620,42 +255,42 @@ void m13500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0_t2[0] = c0[0]; + w0_t2[1] = c0[1]; + w0_t2[2] = c0[2]; + w0_t2[3] = c0[3]; + w1_t2[0] = c1[0]; + w1_t2[1] = c1[1]; + w1_t2[2] = c1[2]; + w1_t2[3] = c1[3]; + w2_t2[0] = c2[0]; + w2_t2[1] = c2[1]; + w2_t2[2] = c2[2]; + w2_t2[3] = c2[3]; + w3_t2[0] = c3[0]; + w3_t2[1] = c3[1]; + w3_t2[2] = c3[2]; + w3_t2[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0_t2[0]; + u32x w1_t = w0_t2[1]; + u32x w2_t = w0_t2[2]; + u32x w3_t = w0_t2[3]; + u32x w4_t = w1_t2[0]; + u32x w5_t = w1_t2[1]; + u32x w6_t = w1_t2[2]; + u32x w7_t = w1_t2[3]; + u32x w8_t = w2_t2[0]; + u32x w9_t = w2_t2[1]; + u32x wa_t = w2_t2[2]; + u32x wb_t = w2_t2[3]; + u32x wc_t = w3_t2[0]; + u32x wd_t = w3_t2[1]; u32x we_t = 0; u32x wf_t = pw_salt_len * 8; @@ -786,6 +421,8 @@ void m13500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl const u32 pc_offset = esalt_bufs[salt_pos].pc_offset; + const u32 pc_offset4 = pc_offset * 4; + u32 pc_digest[5]; pc_digest[0] = esalt_bufs[salt_pos].pc_digest[0]; @@ -848,45 +485,51 @@ void m13500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl * prepend salt -- can't stay outside the loop this time */ - u32x carry[16]; + u32x w0_t2[4]; + u32x w1_t2[4]; + u32x w2_t2[4]; + u32x w3_t2[4]; - carry[ 0] = w0lr; - carry[ 1] = w0[1]; - carry[ 2] = w0[2]; - carry[ 3] = w0[3]; - carry[ 4] = w1[0]; - carry[ 5] = w1[1]; - carry[ 6] = w1[2]; - carry[ 7] = w1[3]; - carry[ 8] = w2[0]; - carry[ 9] = w2[1]; - carry[10] = w2[2]; - carry[11] = w2[3]; - carry[12] = w3[0]; - carry[13] = w3[1]; - carry[14] = w3[2]; - carry[15] = w3[3]; + w0_t2[0] = w0lr; + w0_t2[1] = w0[1]; + w0_t2[2] = w0[2]; + w0_t2[3] = w0[3]; + w1_t2[0] = w1[0]; + w1_t2[1] = w1[1]; + w1_t2[2] = w1[2]; + w1_t2[3] = w1[3]; + w2_t2[0] = w2[0]; + w2_t2[1] = w2[1]; + w2_t2[2] = w2[2]; + w2_t2[3] = w2[3]; + w3_t2[0] = w3[0]; + w3_t2[1] = w3[1]; + w3_t2[2] = w3[2]; + w3_t2[3] = w3[3]; - u32x w[16]; + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; - w[ 0] = salt_buf0[0]; - w[ 1] = salt_buf0[1]; - w[ 2] = salt_buf0[2]; - w[ 3] = salt_buf0[3]; - w[ 4] = salt_buf1[0]; - w[ 5] = salt_buf1[1]; - w[ 6] = salt_buf1[2]; - w[ 7] = salt_buf1[3]; - w[ 8] = salt_buf2[0]; - w[ 9] = salt_buf2[1]; - w[10] = salt_buf2[2]; - w[11] = salt_buf2[3]; - w[12] = salt_buf3[0]; - w[13] = salt_buf3[1]; - w[14] = salt_buf3[2]; - w[15] = salt_buf3[3]; + switch_buffer_by_offset_carry_be (w0_t2, w1_t2, w2_t2, w3_t2, c0, c1, c2, c3, salt_len - pc_offset4); - memcat64c_be (w, salt_len & 0x3f, carry); + w0_t2[0] |= salt_buf0[0]; + w0_t2[1] |= salt_buf0[1]; + w0_t2[2] |= salt_buf0[2]; + w0_t2[3] |= salt_buf0[3]; + w1_t2[0] |= salt_buf1[0]; + w1_t2[1] |= salt_buf1[1]; + w1_t2[2] |= salt_buf1[2]; + w1_t2[3] |= salt_buf1[3]; + w2_t2[0] |= salt_buf2[0]; + w2_t2[1] |= salt_buf2[1]; + w2_t2[2] |= salt_buf2[2]; + w2_t2[3] |= salt_buf2[3]; + w3_t2[0] |= salt_buf3[0]; + w3_t2[1] |= salt_buf3[1]; + w3_t2[2] |= salt_buf3[2]; + w3_t2[3] |= salt_buf3[3]; u32x a = pc_digest[0]; u32x b = pc_digest[1]; @@ -894,24 +537,26 @@ void m13500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl u32x d = pc_digest[3]; u32x e = pc_digest[4]; - if (((salt_len & 0x3f) + pw_len) >= 56) + const u32 r = (salt_len - pc_offset4) + pw_len; + + if (r >= 56) { - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32x w0_t = w0_t2[0]; + u32x w1_t = w0_t2[1]; + u32x w2_t = w0_t2[2]; + u32x w3_t = w0_t2[3]; + u32x w4_t = w1_t2[0]; + u32x w5_t = w1_t2[1]; + u32x w6_t = w1_t2[2]; + u32x w7_t = w1_t2[3]; + u32x w8_t = w2_t2[0]; + u32x w9_t = w2_t2[1]; + u32x wa_t = w2_t2[2]; + u32x wb_t = w2_t2[3]; + u32x wc_t = w3_t2[0]; + u32x wd_t = w3_t2[1]; + u32x we_t = w3_t2[2]; + u32x wf_t = w3_t2[3]; #undef K #define K SHA1C00 @@ -1015,42 +660,42 @@ void m13500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl d += pc_digest[3]; e += pc_digest[4]; - w[ 0] = carry[ 0]; - w[ 1] = carry[ 1]; - w[ 2] = carry[ 2]; - w[ 3] = carry[ 3]; - w[ 4] = carry[ 4]; - w[ 5] = carry[ 5]; - w[ 6] = carry[ 6]; - w[ 7] = carry[ 7]; - w[ 8] = carry[ 8]; - w[ 9] = carry[ 9]; - w[10] = carry[10]; - w[11] = carry[11]; - w[12] = carry[12]; - w[13] = carry[13]; - w[14] = carry[14]; - w[15] = carry[15]; + w0_t2[0] = c0[0]; + w0_t2[1] = c0[1]; + w0_t2[2] = c0[2]; + w0_t2[3] = c0[3]; + w1_t2[0] = c1[0]; + w1_t2[1] = c1[1]; + w1_t2[2] = c1[2]; + w1_t2[3] = c1[3]; + w2_t2[0] = c2[0]; + w2_t2[1] = c2[1]; + w2_t2[2] = c2[2]; + w2_t2[3] = c2[3]; + w3_t2[0] = c3[0]; + w3_t2[1] = c3[1]; + w3_t2[2] = c3[2]; + w3_t2[3] = c3[3]; } /** * final sha1 */ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; + u32x w0_t = w0_t2[0]; + u32x w1_t = w0_t2[1]; + u32x w2_t = w0_t2[2]; + u32x w3_t = w0_t2[3]; + u32x w4_t = w1_t2[0]; + u32x w5_t = w1_t2[1]; + u32x w6_t = w1_t2[2]; + u32x w7_t = w1_t2[3]; + u32x w8_t = w2_t2[0]; + u32x w9_t = w2_t2[1]; + u32x wa_t = w2_t2[2]; + u32x wb_t = w2_t2[3]; + u32x wc_t = w3_t2[0]; + u32x wd_t = w3_t2[1]; u32x we_t = 0; u32x wf_t = pw_salt_len * 8; diff --git a/docs/changes.txt b/docs/changes.txt index ca7b9298c..3b6145795 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -53,6 +53,7 @@ - Fixed pointer to local outside scope in case -j or -k is used - Fixed pointer to local outside scope in case --markov-hcstat is not used - Fixed a problem within the Kerberos 5 TGS-REP (-m 13100) hash parser +- Fixed cracking of PeopleSoft Token if salt length + password length is >= 128 byte ## ## Technical diff --git a/src/cpu_sha1.c b/src/cpu_sha1.c index 9ba0fb6a8..5f7f3612d 100644 --- a/src/cpu_sha1.c +++ b/src/cpu_sha1.c @@ -14,11 +14,11 @@ void sha1_64 (u32 block[16], u32 digest[5]) { - u32 a = SHA1M_A; - u32 b = SHA1M_B; - u32 c = SHA1M_C; - u32 d = SHA1M_D; - u32 e = SHA1M_E; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; u32 w0_t = block[ 0]; u32 w1_t = block[ 1]; diff --git a/src/interface.c b/src/interface.c index 40c3e550c..30e024a77 100644 --- a/src/interface.c +++ b/src/interface.c @@ -21667,7 +21667,6 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) hashconfig->parse_func = pstoken_parse_hash; hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT - | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_PREPENDED_SALT | OPTI_TYPE_RAW_HASH;