1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-26 09:58:16 +00:00

Cleanup of all raw-MD5 based algorithms; Should be done for all raw-SHA1, -SHA256 and -SHA512 based algorithms as well

This commit is contained in:
Jens Steube 2016-02-24 11:35:13 +01:00
parent 531f72c905
commit 7e9fee2155
19 changed files with 1118 additions and 1364 deletions

View File

@ -39,18 +39,16 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -70,6 +68,7 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
append_0x80_2x4 (w0, w1, out_len); append_0x80_2x4 (w0, w1, out_len);
w3[2] = out_len * 8; w3[2] = out_len * 8;
w3[3] = 0;
u32x a = MD5M_A; u32x a = MD5M_A;
u32x b = MD5M_B; u32x b = MD5M_B;
@ -173,18 +172,16 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -216,6 +213,7 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
append_0x80_2x4 (w0, w1, out_len); append_0x80_2x4 (w0, w1, out_len);
w3[2] = out_len * 8; w3[2] = out_len * 8;
w3[3] = 0;
u32x a = MD5M_A; u32x a = MD5M_A;
u32x b = MD5M_B; u32x b = MD5M_B;

View File

@ -36,17 +36,17 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -69,14 +69,14 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -228,17 +228,17 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -273,14 +273,14 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };

View File

@ -39,18 +39,16 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -99,10 +97,10 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
@ -248,18 +246,16 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -320,10 +316,10 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];

View File

@ -36,17 +36,17 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -97,14 +97,14 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -155,10 +155,10 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
@ -303,17 +303,17 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -376,14 +376,14 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -434,10 +434,10 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];

View File

@ -74,6 +74,8 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
const u32 salt_len = salt_bufs[salt_pos].salt_len; const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= salt_buf0[0]; w[ 0] |= salt_buf0[0];
@ -90,13 +92,8 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w[11] |= salt_buf2[3]; w[11] |= salt_buf2[3];
w[12] |= salt_buf3[0]; w[12] |= salt_buf3[0];
w[13] |= salt_buf3[1]; w[13] |= salt_buf3[1];
w[14] |= salt_buf3[2]; w[14] = pw_salt_len * 8;
w[15] |= salt_buf3[3]; w[15] = 0;
const u32 pw_salt_len = pw_len + salt_len;
w[14] = pw_salt_len * 8;
w[15] = 0;
/** /**
* base * base
@ -509,7 +506,7 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w[11] = 0; w[11] = 0;
w[12] = 0; w[12] = 0;
w[13] = 0; w[13] = 0;
w[14] = pws[gid].i[14]; w[14] = 0;
w[15] = 0; w[15] = 0;
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -547,7 +544,7 @@ __kernel void m00010_m08 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w[11] = 0; w[11] = 0;
w[12] = 0; w[12] = 0;
w[13] = 0; w[13] = 0;
w[14] = pws[gid].i[14]; w[14] = 0;
w[15] = 0; w[15] = 0;
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;

View File

@ -39,18 +39,16 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -101,51 +99,26 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len; const u32 out_salt_len = out_len + salt_len;
u32x w0_t[4]; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0[0]; w0[0] |= salt_buf0[0];
w0_t[1] = w0[1]; w0[1] |= salt_buf0[1];
w0_t[2] = w0[2]; w0[2] |= salt_buf0[2];
w0_t[3] = w0[3]; w0[3] |= salt_buf0[3];
w1_t[0] = w1[0]; w1[0] |= salt_buf1[0];
w1_t[1] = w1[1]; w1[1] |= salt_buf1[1];
w1_t[2] = w1[2]; w1[2] |= salt_buf1[2];
w1_t[3] = w1[3]; w1[3] |= salt_buf1[3];
w2_t[0] = w2[0]; w2[0] |= salt_buf2[0];
w2_t[1] = w2[1]; w2[1] |= salt_buf2[1];
w2_t[2] = w2[2]; w2[2] |= salt_buf2[2];
w2_t[3] = w2[3]; w2[3] |= salt_buf2[3];
w3_t[0] = w3[0]; w3[0] |= salt_buf3[0];
w3_t[1] = w3[1]; w3[1] |= salt_buf3[1];
w3_t[2] = w3[2]; w3[2] = out_salt_len * 8;
w3_t[3] = w3[3]; w3[3] = 0;
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len); append_0x80_4x4 (w0, w1, w2, w3, out_salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -156,73 +129,73 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b); COMPARE_M_SIMD (a, d, c, b);
} }
@ -253,18 +226,16 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -327,51 +298,26 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_salt_len = out_len + salt_len; const u32 out_salt_len = out_len + salt_len;
u32x w0_t[4]; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0[0]; w0[0] |= salt_buf0[0];
w0_t[1] = w0[1]; w0[1] |= salt_buf0[1];
w0_t[2] = w0[2]; w0[2] |= salt_buf0[2];
w0_t[3] = w0[3]; w0[3] |= salt_buf0[3];
w1_t[0] = w1[0]; w1[0] |= salt_buf1[0];
w1_t[1] = w1[1]; w1[1] |= salt_buf1[1];
w1_t[2] = w1[2]; w1[2] |= salt_buf1[2];
w1_t[3] = w1[3]; w1[3] |= salt_buf1[3];
w2_t[0] = w2[0]; w2[0] |= salt_buf2[0];
w2_t[1] = w2[1]; w2[1] |= salt_buf2[1];
w2_t[2] = w2[2]; w2[2] |= salt_buf2[2];
w2_t[3] = w2[3]; w2[3] |= salt_buf2[3];
w3_t[0] = w3[0]; w3[0] |= salt_buf3[0];
w3_t[1] = w3[1]; w3[1] |= salt_buf3[1];
w3_t[2] = w3[2]; w3[2] = out_salt_len * 8;
w3_t[3] = w3[3]; w3[3] = 0;
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len); append_0x80_4x4 (w0, w1, w2, w3, out_salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -382,76 +328,76 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue; if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b); COMPARE_S_SIMD (a, d, c, b);
} }

View File

@ -36,17 +36,17 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -97,14 +97,14 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -148,8 +148,8 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w2[3] = wordl2[3] | wordr2[3]; w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0]; w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1]; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
/** /**
* prepend salt * prepend salt
@ -281,17 +281,17 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -354,14 +354,14 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -405,8 +405,8 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w2[3] = wordl2[3] | wordr2[3]; w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0]; w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1]; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
/** /**
* prepend salt * prepend salt

View File

@ -81,8 +81,8 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2[3] |= salt_buf2[3]; w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0]; w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1]; w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2]; w3[2] = pw_salt_len * 8;
w3[3] |= salt_buf3[3]; w3[3] = 0;
/** /**
* loop * loop
@ -113,8 +113,8 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] = w2[3]; w2_t[3] = w2[3];
w3_t[0] = w3[0]; w3_t[0] = w3[0];
w3_t[1] = w3[1]; w3_t[1] = w3[1];
w3_t[2] = pw_salt_len * 8; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len); overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len);
@ -272,8 +272,8 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2[3] |= salt_buf2[3]; w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0]; w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1]; w3[1] |= salt_buf3[1];
w3[2] |= salt_buf3[2]; w3[2] = pw_salt_len * 8;
w3[3] |= salt_buf3[3]; w3[3] = 0;
/** /**
* loop * loop
@ -304,8 +304,8 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
w2_t[3] = w2[3]; w2_t[3] = w2[3];
w3_t[0] = w3[0]; w3_t[0] = w3[0];
w3_t[1] = w3[1]; w3_t[1] = w3[1];
w3_t[2] = pw_salt_len * 8; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len); overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len);

View File

@ -41,18 +41,16 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -97,66 +95,57 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1);
const u32 out_len2 = out_len * 2;
/** /**
* append salt * append salt
*/ */
u32x s0[4]; u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2]; s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3]; s0[3] = salt_buf0[3];
u32x s1[4];
s1[0] = salt_buf1[0]; s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1]; s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2]; s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3]; s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
u32x s2[4]; switch_buffer_by_offset_le (s0, s1, s2, s3, out_len2);
s2[0] = 0; const u32 pw_salt_len = out_len2 + salt_len;
s2[1] = 0;
s2[2] = 0;
s2[3] = 0;
u32x s3[4]; w0[0] |= s0[0];
w0[1] |= s0[1];
s3[0] = 0; w0[2] |= s0[2];
s3[1] = 0; w0[3] |= s0[3];
s3[2] = 0; w1[0] |= s1[0];
s3[3] = 0; w1[1] |= s1[1];
w1[2] |= s1[2];
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2)); w1[3] |= s1[3];
w2[0] |= s2[0];
const u32 out_salt_len = (out_len * 2) + salt_len; w2[1] |= s2[1];
w2[2] |= s2[2];
u32x w0_t[4]; w2[3] |= s2[3];
u32x w1_t[4]; w3[0] |= s3[0];
u32x w2_t[4]; w3[1] |= s3[1];
u32x w3_t[4]; w3[2] = pw_salt_len * 8;
w3[3] = 0;
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
w0_t[0] |= s0[0];
w0_t[1] |= s0[1];
w0_t[2] |= s0[2];
w0_t[3] |= s0[3];
w1_t[0] |= s1[0];
w1_t[1] |= s1[1];
w1_t[2] |= s1[2];
w1_t[3] |= s1[3];
w2_t[0] |= s2[0];
w2_t[1] |= s2[1];
w2_t[2] |= s2[2];
w2_t[3] |= s2[3];
w3_t[0] |= s3[0];
w3_t[1] |= s3[1];
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -167,73 +156,73 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b); COMPARE_M_SIMD (a, d, c, b);
} }
@ -264,18 +253,16 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -332,66 +319,57 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1);
const u32 out_len2 = out_len * 2;
/** /**
* append salt * append salt
*/ */
u32x s0[4]; u32x s0[4];
u32x s1[4];
u32x s2[4];
u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
s0[2] = salt_buf0[2]; s0[2] = salt_buf0[2];
s0[3] = salt_buf0[3]; s0[3] = salt_buf0[3];
u32x s1[4];
s1[0] = salt_buf1[0]; s1[0] = salt_buf1[0];
s1[1] = salt_buf1[1]; s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2]; s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3]; s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
u32x s2[4]; switch_buffer_by_offset_le (s0, s1, s2, s3, out_len2);
s2[0] = 0; const u32 pw_salt_len = out_len2 + salt_len;
s2[1] = 0;
s2[2] = 0;
s2[3] = 0;
u32x s3[4]; w0[0] |= s0[0];
w0[1] |= s0[1];
s3[0] = 0; w0[2] |= s0[2];
s3[1] = 0; w0[3] |= s0[3];
s3[2] = 0; w1[0] |= s1[0];
s3[3] = 0; w1[1] |= s1[1];
w1[2] |= s1[2];
switch_buffer_by_offset_le (s0, s1, s2, s3, (out_len * 2)); w1[3] |= s1[3];
w2[0] |= s2[0];
const u32 out_salt_len = (out_len * 2) + salt_len; w2[1] |= s2[1];
w2[2] |= s2[2];
u32x w0_t[4]; w2[3] |= s2[3];
u32x w1_t[4]; w3[0] |= s3[0];
u32x w2_t[4]; w3[1] |= s3[1];
u32x w3_t[4]; w3[2] = pw_salt_len * 8;
w3[3] = 0;
make_unicode (w0, w0_t, w1_t);
make_unicode (w1, w2_t, w3_t);
w0_t[0] |= s0[0];
w0_t[1] |= s0[1];
w0_t[2] |= s0[2];
w0_t[3] |= s0[3];
w1_t[0] |= s1[0];
w1_t[1] |= s1[1];
w1_t[2] |= s1[2];
w1_t[3] |= s1[3];
w2_t[0] |= s2[0];
w2_t[1] |= s2[1];
w2_t[2] |= s2[2];
w2_t[3] |= s2[3];
w3_t[0] |= s3[0];
w3_t[1] |= s3[1];
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -402,76 +380,76 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue; if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b); COMPARE_S_SIMD (a, d, c, b);
} }

View File

@ -38,17 +38,17 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -99,14 +99,14 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -143,15 +143,15 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w1[0] = wordl1[0] | wordr1[0]; w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1]; w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2]; w1[2] = wordl1[2] | wordr1[2];
w1[3] = 0; w1[3] = wordl1[3] | wordr1[3];
w2[0] = 0; w2[0] = wordl2[0] | wordr2[0];
w2[1] = 0; w2[1] = wordl2[1] | wordr2[1];
w2[2] = 0; w2[2] = wordl2[2] | wordr2[2];
w2[3] = 0; w2[3] = wordl2[3] | wordr2[3];
w3[0] = 0; w3[0] = wordl3[0] | wordr3[0];
w3[1] = 0; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
make_unicode (w1, w2, w3); make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1); make_unicode (w0, w0, w1);
@ -162,10 +162,10 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
@ -175,6 +175,14 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s1[1] = salt_buf1[1]; s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2]; s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3]; s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2);
@ -302,17 +310,17 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -375,14 +383,14 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -419,15 +427,15 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w1[0] = wordl1[0] | wordr1[0]; w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1]; w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2]; w1[2] = wordl1[2] | wordr1[2];
w1[3] = 0; w1[3] = wordl1[3] | wordr1[3];
w2[0] = 0; w2[0] = wordl2[0] | wordr2[0];
w2[1] = 0; w2[1] = wordl2[1] | wordr2[1];
w2[2] = 0; w2[2] = wordl2[2] | wordr2[2];
w2[3] = 0; w2[3] = wordl2[3] | wordr2[3];
w3[0] = 0; w3[0] = wordl3[0] | wordr3[0];
w3[1] = 0; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
make_unicode (w1, w2, w3); make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1); make_unicode (w0, w0, w1);
@ -438,10 +446,10 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
* append salt * append salt
*/ */
u32x s0[4] = { 0 }; u32x s0[4];
u32x s1[4] = { 0 }; u32x s1[4];
u32x s2[4] = { 0 }; u32x s2[4];
u32x s3[4] = { 0 }; u32x s3[4];
s0[0] = salt_buf0[0]; s0[0] = salt_buf0[0];
s0[1] = salt_buf0[1]; s0[1] = salt_buf0[1];
@ -451,6 +459,14 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
s1[1] = salt_buf1[1]; s1[1] = salt_buf1[1];
s1[2] = salt_buf1[2]; s1[2] = salt_buf1[2];
s1[3] = salt_buf1[3]; s1[3] = salt_buf1[3];
s2[0] = salt_buf2[0];
s2[1] = salt_buf2[1];
s2[2] = salt_buf2[2];
s2[3] = salt_buf2[3];
s3[0] = salt_buf3[0];
s3[1] = salt_buf3[1];
s3[2] = salt_buf3[2];
s3[3] = salt_buf3[3];
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2);

View File

@ -74,6 +74,8 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
const u32 salt_len = salt_bufs[salt_pos].salt_len; const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 pw_salt_len = pw_len + salt_len;
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
w[ 0] |= salt_buf0[0]; w[ 0] |= salt_buf0[0];
@ -90,13 +92,8 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
w[11] |= salt_buf2[3]; w[11] |= salt_buf2[3];
w[12] |= salt_buf3[0]; w[12] |= salt_buf3[0];
w[13] |= salt_buf3[1]; w[13] |= salt_buf3[1];
w[14] |= salt_buf3[2]; w[14] = pw_salt_len * 8;
w[15] |= salt_buf3[3]; w[15] = 0;
const u32 pw_salt_len = pw_len + salt_len;
w[14] = pw_salt_len * 8;
w[15] = 0;
/** /**
* base * base

View File

@ -39,18 +39,16 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -95,43 +93,37 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1);
const u32 out_len2 = out_len * 2;
/** /**
* prepend salt * prepend salt
*/ */
const u32 out_salt_len = (out_len * 2) + salt_len; const u32 out_salt_len = out_len2 + salt_len;
u32x w0_t[4]; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
make_unicode (w0, w0_t, w1_t); w0[0] |= salt_buf0[0];
make_unicode (w1, w2_t, w3_t); w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] = out_salt_len * 8;
w3[3] = 0;
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len); append_0x80_4x4 (w0, w1, w2, w3, out_salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -142,73 +134,73 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_M_SIMD (a, d, c, b); COMPARE_M_SIMD (a, d, c, b);
} }
@ -239,18 +231,16 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -307,43 +297,37 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1);
const u32 out_len2 = out_len * 2;
/** /**
* prepend salt * prepend salt
*/ */
const u32 out_salt_len = (out_len * 2) + salt_len; const u32 out_salt_len = out_len2 + salt_len;
u32x w0_t[4]; switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
make_unicode (w0, w0_t, w1_t); w0[0] |= salt_buf0[0];
make_unicode (w1, w2_t, w3_t); w0[1] |= salt_buf0[1];
w0[2] |= salt_buf0[2];
w0[3] |= salt_buf0[3];
w1[0] |= salt_buf1[0];
w1[1] |= salt_buf1[1];
w1[2] |= salt_buf1[2];
w1[3] |= salt_buf1[3];
w2[0] |= salt_buf2[0];
w2[1] |= salt_buf2[1];
w2[2] |= salt_buf2[2];
w2[3] |= salt_buf2[3];
w3[0] |= salt_buf3[0];
w3[1] |= salt_buf3[1];
w3[2] = out_salt_len * 8;
w3[3] = 0;
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, salt_len); append_0x80_4x4 (w0, w1, w2, w3, out_salt_len);
w0_t[0] |= salt_buf0[0];
w0_t[1] |= salt_buf0[1];
w0_t[2] |= salt_buf0[2];
w0_t[3] |= salt_buf0[3];
w1_t[0] |= salt_buf1[0];
w1_t[1] |= salt_buf1[1];
w1_t[2] |= salt_buf1[2];
w1_t[3] |= salt_buf1[3];
w2_t[0] |= salt_buf2[0];
w2_t[1] |= salt_buf2[1];
w2_t[2] |= salt_buf2[2];
w2_t[3] |= salt_buf2[3];
w3_t[0] |= salt_buf3[0];
w3_t[1] |= salt_buf3[1];
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
w3_t[2] = out_salt_len * 8;
w3_t[3] = 0;
/** /**
* md5 * md5
@ -354,76 +338,76 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x c = MD5M_C; u32x c = MD5M_C;
u32x d = MD5M_D; u32x d = MD5M_D;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
if (MATCHES_NONE_VS (a, search[0])) continue; if (MATCHES_NONE_VS (a, search[0])) continue;
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
COMPARE_S_SIMD (a, d, c, b); COMPARE_S_SIMD (a, d, c, b);
} }

View File

@ -38,17 +38,17 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -99,14 +99,14 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -143,15 +143,15 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w1[0] = wordl1[0] | wordr1[0]; w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1]; w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2]; w1[2] = wordl1[2] | wordr1[2];
w1[3] = 0; w1[3] = wordl1[3] | wordr1[3];
w2[0] = 0; w2[0] = wordl2[0] | wordr2[0];
w2[1] = 0; w2[1] = wordl2[1] | wordr2[1];
w2[2] = 0; w2[2] = wordl2[2] | wordr2[2];
w2[3] = 0; w2[3] = wordl2[3] | wordr2[3];
w3[0] = 0; w3[0] = wordl3[0] | wordr3[0];
w3[1] = 0; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
make_unicode (w1, w2, w3); make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1); make_unicode (w0, w0, w1);
@ -288,17 +288,17 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -361,14 +361,14 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -405,15 +405,15 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w1[0] = wordl1[0] | wordr1[0]; w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1]; w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2]; w1[2] = wordl1[2] | wordr1[2];
w1[3] = 0; w1[3] = wordl1[3] | wordr1[3];
w2[0] = 0; w2[0] = wordl2[0] | wordr2[0];
w2[1] = 0; w2[1] = wordl2[1] | wordr2[1];
w2[2] = 0; w2[2] = wordl2[2] | wordr2[2];
w2[3] = 0; w2[3] = wordl2[3] | wordr2[3];
w3[0] = 0; w3[0] = wordl3[0] | wordr3[0];
w3[1] = 0; w3[1] = wordl3[1] | wordr3[1];
w3[2] = 0; w3[2] = wordl3[2] | wordr3[2];
w3[3] = 0; w3[3] = wordl3[3] | wordr3[3];
make_unicode (w1, w2, w3); make_unicode (w1, w2, w3);
make_unicode (w0, w0, w1); make_unicode (w0, w0, w1);

View File

@ -63,101 +63,60 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* prepend salt * prepend salt
*/ */
u32 w0_t[4]; const u32 w0l = w0[0];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = w0[0]; switch_buffer_by_offset_le_S (w0, w1, w2, w3, salt_len);
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len); w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0_t[0] |= salt_buf0[0]; w0[2] |= salt_buf0[2];
w0_t[1] |= salt_buf0[1]; w0[3] |= salt_buf0[3];
w0_t[2] |= salt_buf0[2]; w1[0] |= salt_buf1[0];
w0_t[3] |= salt_buf0[3]; w1[1] |= salt_buf1[1];
w1_t[0] |= salt_buf1[0]; w1[2] |= salt_buf1[2];
w1_t[1] |= salt_buf1[1]; w1[3] |= salt_buf1[3];
w1_t[2] |= salt_buf1[2]; w2[0] |= salt_buf2[0];
w1_t[3] |= salt_buf1[3]; w2[1] |= salt_buf2[1];
w2_t[0] |= salt_buf2[0]; w2[2] |= salt_buf2[2];
w2_t[1] |= salt_buf2[1]; w2[3] |= salt_buf2[3];
w2_t[2] |= salt_buf2[2]; w3[0] |= salt_buf3[0];
w2_t[3] |= salt_buf2[3]; w3[1] |= salt_buf3[1];
w3_t[0] |= salt_buf3[0]; w3[2] = pw_salt_len * 8;
w3_t[1] |= salt_buf3[1]; w3[3] = 0;
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/** /**
* loop * loop
*/ */
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{ {
const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r; const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4]; u32x w0_t[4];
u32x w1_t[4]; u32x w1_t[4];
u32x w2_t[4]; u32x w2_t[4];
u32x w3_t[4]; u32x w3_t[4];
w0_t[0] = wx[ 0]; w0_t[0] = w0[0];
w0_t[1] = wx[ 1]; w0_t[1] = w0[1];
w0_t[2] = wx[ 2]; w0_t[2] = w0[2];
w0_t[3] = wx[ 3]; w0_t[3] = w0[3];
w1_t[0] = wx[ 4]; w1_t[0] = w1[0];
w1_t[1] = wx[ 5]; w1_t[1] = w1[1];
w1_t[2] = wx[ 6]; w1_t[2] = w1[2];
w1_t[3] = wx[ 7]; w1_t[3] = w1[3];
w2_t[0] = wx[ 8]; w2_t[0] = w2[0];
w2_t[1] = wx[ 9]; w2_t[1] = w2[1];
w2_t[2] = wx[10]; w2_t[2] = w2[2];
w2_t[3] = wx[11]; w2_t[3] = w2[3];
w3_t[0] = wx[12]; w3_t[0] = w3[0];
w3_t[1] = wx[13]; w3_t[1] = w3[1];
w3_t[2] = pw_salt_len * 8; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len);
/** /**
* md5 * md5
@ -295,101 +254,60 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
* prepend salt * prepend salt
*/ */
u32 w0_t[4]; const u32 w0l = w0[0];
u32 w1_t[4];
u32 w2_t[4];
u32 w3_t[4];
w0_t[0] = w0[0]; switch_buffer_by_offset_le_S (w0, w1, w2, w3, salt_len);
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
w2_t[0] = w2[0];
w2_t[1] = w2[1];
w2_t[2] = w2[2];
w2_t[3] = w2[3];
w3_t[0] = w3[0];
w3_t[1] = w3[1];
w3_t[2] = w3[2];
w3_t[3] = w3[3];
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len); w0[0] |= salt_buf0[0];
w0[1] |= salt_buf0[1];
w0_t[0] |= salt_buf0[0]; w0[2] |= salt_buf0[2];
w0_t[1] |= salt_buf0[1]; w0[3] |= salt_buf0[3];
w0_t[2] |= salt_buf0[2]; w1[0] |= salt_buf1[0];
w0_t[3] |= salt_buf0[3]; w1[1] |= salt_buf1[1];
w1_t[0] |= salt_buf1[0]; w1[2] |= salt_buf1[2];
w1_t[1] |= salt_buf1[1]; w1[3] |= salt_buf1[3];
w1_t[2] |= salt_buf1[2]; w2[0] |= salt_buf2[0];
w1_t[3] |= salt_buf1[3]; w2[1] |= salt_buf2[1];
w2_t[0] |= salt_buf2[0]; w2[2] |= salt_buf2[2];
w2_t[1] |= salt_buf2[1]; w2[3] |= salt_buf2[3];
w2_t[2] |= salt_buf2[2]; w3[0] |= salt_buf3[0];
w2_t[3] |= salt_buf2[3]; w3[1] |= salt_buf3[1];
w3_t[0] |= salt_buf3[0]; w3[2] = pw_salt_len * 8;
w3_t[1] |= salt_buf3[1]; w3[3] = 0;
w3_t[2] |= salt_buf3[2];
w3_t[3] |= salt_buf3[3];
/** /**
* loop * loop
*/ */
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
{ {
const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r; const u32x w0lr = w0l | w0r;
u32x wx[16];
wx[ 0] = w0_t[0];
wx[ 1] = w0_t[1];
wx[ 2] = w0_t[2];
wx[ 3] = w0_t[3];
wx[ 4] = w1_t[0];
wx[ 5] = w1_t[1];
wx[ 6] = w1_t[2];
wx[ 7] = w1_t[3];
wx[ 8] = w2_t[0];
wx[ 9] = w2_t[1];
wx[10] = w2_t[2];
wx[11] = w2_t[3];
wx[12] = w3_t[0];
wx[13] = w3_t[1];
wx[14] = w3_t[2];
wx[15] = w3_t[3];
overwrite_at_le (wx, w0lr, salt_len);
u32x w0_t[4]; u32x w0_t[4];
u32x w1_t[4]; u32x w1_t[4];
u32x w2_t[4]; u32x w2_t[4];
u32x w3_t[4]; u32x w3_t[4];
w0_t[0] = wx[ 0]; w0_t[0] = w0[0];
w0_t[1] = wx[ 1]; w0_t[1] = w0[1];
w0_t[2] = wx[ 2]; w0_t[2] = w0[2];
w0_t[3] = wx[ 3]; w0_t[3] = w0[3];
w1_t[0] = wx[ 4]; w1_t[0] = w1[0];
w1_t[1] = wx[ 5]; w1_t[1] = w1[1];
w1_t[2] = wx[ 6]; w1_t[2] = w1[2];
w1_t[3] = wx[ 7]; w1_t[3] = w1[3];
w2_t[0] = wx[ 8]; w2_t[0] = w2[0];
w2_t[1] = wx[ 9]; w2_t[1] = w2[1];
w2_t[2] = wx[10]; w2_t[2] = w2[2];
w2_t[3] = wx[11]; w2_t[3] = w2[3];
w3_t[0] = wx[12]; w3_t[0] = w3[0];
w3_t[1] = wx[13]; w3_t[1] = w3[1];
w3_t[2] = pw_salt_len * 8; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
overwrite_at_le_4x4 (w0_t, w1_t, w2_t, w3_t, w0lr, salt_len);
/** /**
* md5 * md5

View File

@ -222,18 +222,16 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -282,59 +280,31 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
* pads * pads
*/ */
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32x ipad[4]; u32x ipad[4];
u32x opad[4]; u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); hmac_md5_pad (w0, w1, w2, w3, ipad, opad);
w0_t[0] = salt_buf0[0]; w0[0] = salt_buf0[0];
w0_t[1] = salt_buf0[1]; w0[1] = salt_buf0[1];
w0_t[2] = salt_buf0[2]; w0[2] = salt_buf0[2];
w0_t[3] = salt_buf0[3]; w0[3] = salt_buf0[3];
w1_t[0] = salt_buf1[0]; w1[0] = salt_buf1[0];
w1_t[1] = salt_buf1[1]; w1[1] = salt_buf1[1];
w1_t[2] = salt_buf1[2]; w1[2] = salt_buf1[2];
w1_t[3] = salt_buf1[3]; w1[3] = salt_buf1[3];
w2_t[0] = salt_buf2[0]; w2[0] = salt_buf2[0];
w2_t[1] = salt_buf2[1]; w2[1] = salt_buf2[1];
w2_t[2] = salt_buf2[2]; w2[2] = salt_buf2[2];
w2_t[3] = salt_buf2[3]; w2[3] = salt_buf2[3];
w3_t[0] = salt_buf3[0]; w3[0] = salt_buf3[0];
w3_t[1] = salt_buf3[1]; w3[1] = salt_buf3[1];
w3_t[2] = (64 + salt_len) * 8; w3[2] = (64 + salt_len) * 8;
w3_t[3] = 0; w3[3] = 0;
u32x digest[4]; u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); hmac_md5_run (w0, w1, w2, w3, ipad, opad, digest);
COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]); COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]);
} }
@ -365,18 +335,16 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -437,59 +405,31 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
* pads * pads
*/ */
u32x w0_t[4];
w0_t[0] = w0[0];
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
u32x w1_t[4];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x w2_t[4];
w2_t[0] = 0;
w2_t[1] = 0;
w2_t[2] = 0;
w2_t[3] = 0;
u32x w3_t[4];
w3_t[0] = 0;
w3_t[1] = 0;
w3_t[2] = 0;
w3_t[3] = 0;
u32x ipad[4]; u32x ipad[4];
u32x opad[4]; u32x opad[4];
hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); hmac_md5_pad (w0, w1, w2, w3, ipad, opad);
w0_t[0] = salt_buf0[0]; w0[0] = salt_buf0[0];
w0_t[1] = salt_buf0[1]; w0[1] = salt_buf0[1];
w0_t[2] = salt_buf0[2]; w0[2] = salt_buf0[2];
w0_t[3] = salt_buf0[3]; w0[3] = salt_buf0[3];
w1_t[0] = salt_buf1[0]; w1[0] = salt_buf1[0];
w1_t[1] = salt_buf1[1]; w1[1] = salt_buf1[1];
w1_t[2] = salt_buf1[2]; w1[2] = salt_buf1[2];
w1_t[3] = salt_buf1[3]; w1[3] = salt_buf1[3];
w2_t[0] = salt_buf2[0]; w2[0] = salt_buf2[0];
w2_t[1] = salt_buf2[1]; w2[1] = salt_buf2[1];
w2_t[2] = salt_buf2[2]; w2[2] = salt_buf2[2];
w2_t[3] = salt_buf2[3]; w2[3] = salt_buf2[3];
w3_t[0] = salt_buf3[0]; w3[0] = salt_buf3[0];
w3_t[1] = salt_buf3[1]; w3[1] = salt_buf3[1];
w3_t[2] = (64 + salt_len) * 8; w3[2] = (64 + salt_len) * 8;
w3_t[3] = 0; w3[3] = 0;
u32x digest[4]; u32x digest[4];
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); hmac_md5_run (w0, w1, w2, w3, ipad, opad, digest);
COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]); COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]);
} }

View File

@ -219,17 +219,17 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -280,14 +280,14 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -392,17 +392,17 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -465,14 +465,14 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };

View File

@ -257,32 +257,26 @@ static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
*/ */
u32x w0_t[4]; u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0lr; w0_t[0] = w0lr;
w0_t[1] = w0[1]; w0_t[1] = w0[1];
w0_t[2] = w0[2]; w0_t[2] = w0[2];
w0_t[3] = w0[3]; w0_t[3] = w0[3];
u32x w1_t[4];
w1_t[0] = w1[0]; w1_t[0] = w1[0];
w1_t[1] = w1[1]; w1_t[1] = w1[1];
w1_t[2] = w1[2]; w1_t[2] = w1[2];
w1_t[3] = w1[3]; w1_t[3] = w1[3];
u32x w2_t[4];
w2_t[0] = w2[0]; w2_t[0] = w2[0];
w2_t[1] = w2[1]; w2_t[1] = w2[1];
w2_t[2] = w2[2]; w2_t[2] = w2[2];
w2_t[3] = w2[3]; w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0]; w3_t[0] = w3[0];
w3_t[1] = w3[1]; w3_t[1] = w3[1];
w3_t[2] = 0; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
u32x ipad[4]; u32x ipad[4];
u32x opad[4]; u32x opad[4];
@ -380,32 +374,26 @@ static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
*/ */
u32x w0_t[4]; u32x w0_t[4];
u32x w1_t[4];
u32x w2_t[4];
u32x w3_t[4];
w0_t[0] = w0lr; w0_t[0] = w0lr;
w0_t[1] = w0[1]; w0_t[1] = w0[1];
w0_t[2] = w0[2]; w0_t[2] = w0[2];
w0_t[3] = w0[3]; w0_t[3] = w0[3];
u32x w1_t[4];
w1_t[0] = w1[0]; w1_t[0] = w1[0];
w1_t[1] = w1[1]; w1_t[1] = w1[1];
w1_t[2] = w1[2]; w1_t[2] = w1[2];
w1_t[3] = w1[3]; w1_t[3] = w1[3];
u32x w2_t[4];
w2_t[0] = w2[0]; w2_t[0] = w2[0];
w2_t[1] = w2[1]; w2_t[1] = w2[1];
w2_t[2] = w2[2]; w2_t[2] = w2[2];
w2_t[3] = w2[3]; w2_t[3] = w2[3];
u32x w3_t[4];
w3_t[0] = w3[0]; w3_t[0] = w3[0];
w3_t[1] = w3[1]; w3_t[1] = w3[1];
w3_t[2] = 0; w3_t[2] = w3[2];
w3_t[3] = 0; w3_t[3] = w3[3];
u32x ipad[4]; u32x ipad[4];
u32x opad[4]; u32x opad[4];

View File

@ -222,18 +222,16 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;
@ -361,18 +359,16 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pw_buf0[4]; u32 pw_buf0[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
u32 pw_buf1[4]; u32 pw_buf1[4];
pw_buf1[0] = pws[gid].i[ 4]; pw_buf0[0] = pws[gid].i[0];
pw_buf1[1] = pws[gid].i[ 5]; pw_buf0[1] = pws[gid].i[1];
pw_buf1[2] = pws[gid].i[ 6]; pw_buf0[2] = pws[gid].i[2];
pw_buf1[3] = pws[gid].i[ 7]; pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len; const u32 pw_len = pws[gid].pw_len;

View File

@ -219,17 +219,17 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -311,14 +311,14 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };
@ -397,17 +397,17 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
if (gid >= gid_max) return; if (gid >= gid_max) return;
u32 pws0[4] = { 0 }; u32 pw_buf0[4];
u32 pws1[4] = { 0 }; u32 pw_buf1[4];
pws0[0] = pws[gid].i[0]; pw_buf0[0] = pws[gid].i[0];
pws0[1] = pws[gid].i[1]; pw_buf0[1] = pws[gid].i[1];
pws0[2] = pws[gid].i[2]; pw_buf0[2] = pws[gid].i[2];
pws0[3] = pws[gid].i[3]; pw_buf0[3] = pws[gid].i[3];
pws1[0] = pws[gid].i[4]; pw_buf1[0] = pws[gid].i[4];
pws1[1] = pws[gid].i[5]; pw_buf1[1] = pws[gid].i[5];
pws1[2] = pws[gid].i[6]; pw_buf1[2] = pws[gid].i[6];
pws1[3] = pws[gid].i[7]; pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len; const u32 pw_l_len = pws[gid].pw_len;
@ -501,14 +501,14 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
u32x wordl2[4] = { 0 }; u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 }; u32x wordl3[4] = { 0 };
wordl0[0] = pws0[0]; wordl0[0] = pw_buf0[0];
wordl0[1] = pws0[1]; wordl0[1] = pw_buf0[1];
wordl0[2] = pws0[2]; wordl0[2] = pw_buf0[2];
wordl0[3] = pws0[3]; wordl0[3] = pw_buf0[3];
wordl1[0] = pws1[0]; wordl1[0] = pw_buf1[0];
wordl1[1] = pws1[1]; wordl1[1] = pw_buf1[1];
wordl1[2] = pws1[2]; wordl1[2] = pw_buf1[2];
wordl1[3] = pws1[3]; wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 }; u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 }; u32x wordr1[4] = { 0 };