diff --git a/OpenCL/m05400_a0.cl b/OpenCL/m05400_a0.cl index d94ee1e09..ddd890050 100644 --- a/OpenCL/m05400_a0.cl +++ b/OpenCL/m05400_a0.cl @@ -276,33 +276,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; + __local u32 w_s[16]; - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; + barrier (CLK_LOCAL_MEM_FENCE); __local u32 s_msg_buf[128]; @@ -388,20 +369,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; @@ -526,33 +507,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; + __local u32 w_s[16]; - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; + barrier (CLK_LOCAL_MEM_FENCE); __local u32 s_msg_buf[128]; @@ -650,20 +612,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; diff --git a/OpenCL/m05400_a1.cl b/OpenCL/m05400_a1.cl index 4a6e65eac..e2ee82c53 100644 --- a/OpenCL/m05400_a1.cl +++ b/OpenCL/m05400_a1.cl @@ -293,33 +293,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; + __local u32 w_s[16]; - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; + barrier (CLK_LOCAL_MEM_FENCE); __local u32 s_msg_buf[128]; @@ -440,20 +421,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; @@ -597,33 +578,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; + __local u32 w_s[16]; - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; + barrier (CLK_LOCAL_MEM_FENCE); __local u32 s_msg_buf[128]; @@ -756,20 +718,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; diff --git a/OpenCL/m05400_a3.cl b/OpenCL/m05400_a3.cl index 8f5a62ad8..d5664b1d2 100644 --- a/OpenCL/m05400_a3.cl +++ b/OpenCL/m05400_a3.cl @@ -237,7 +237,7 @@ static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[ sha1_transform (w0, w1, w2, w3, digest); } -static void m05400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05400m (__local u32 w_s[16], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -253,34 +253,6 @@ static void m05400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; - - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - /** * loop */ @@ -330,20 +302,20 @@ static void m05400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; @@ -423,7 +395,7 @@ static void m05400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le } } -static void m05400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05400s (__local u32 w_s[16], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -439,34 +411,6 @@ static void m05400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - u32 salt_buf0[4]; - - salt_buf0[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - /** * digest */ @@ -528,20 +472,20 @@ static void m05400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; + w0_t[0] = w_s[ 0]; + w0_t[1] = w_s[ 1]; + w0_t[2] = w_s[ 2]; + w0_t[3] = w_s[ 3]; + w1_t[0] = w_s[ 4]; + w1_t[1] = w_s[ 5]; + w1_t[2] = w_s[ 6]; + w1_t[3] = w_s[ 7]; + w2_t[0] = w_s[ 8]; + w2_t[1] = w_s[ 9]; + w2_t[2] = w_s[10]; + w2_t[3] = w_s[11]; + w3_t[0] = w_s[12]; + w3_t[1] = w_s[13]; w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; @@ -669,6 +613,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo * s_msg */ + __local u32 w_s[16]; + + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } + + barrier (CLK_LOCAL_MEM_FENCE); + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -684,7 +637,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400m (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -735,6 +688,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m08 (__glo * s_msg */ + __local u32 w_s[16]; + + if (lid < 16) + { + w_s[lid] = swap32 (ikepsk_bufs[salt_pos].nr_buf[lid]); + } + + barrier (CLK_LOCAL_MEM_FENCE); + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -750,7 +712,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m08 (__glo * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400m (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -801,6 +763,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m16 (__glo * s_msg */ + __local u32 w_s[16]; + + if (lid < 1) + { + w_s[ 0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); + w_s[ 1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); + w_s[ 2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); + w_s[ 3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); + w_s[ 4] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); + w_s[ 5] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); + w_s[ 6] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); + w_s[ 7] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); + w_s[ 8] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); + w_s[ 9] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); + w_s[10] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); + w_s[11] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); + w_s[12] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); + w_s[13] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); + w_s[14] = 0; + w_s[15] = 0; + } + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -816,7 +800,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m16 (__glo * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400m (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -867,6 +851,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo * s_msg_buf */ + __local u32 w_s[16]; + + if (lid < 1) + { + w_s[ 0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); + w_s[ 1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); + w_s[ 2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); + w_s[ 3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); + w_s[ 4] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); + w_s[ 5] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); + w_s[ 6] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); + w_s[ 7] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); + w_s[ 8] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); + w_s[ 9] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); + w_s[10] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); + w_s[11] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); + w_s[12] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); + w_s[13] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); + w_s[14] = 0; + w_s[15] = 0; + } + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -882,7 +888,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400s (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -933,6 +939,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s08 (__glo * s_msg_buf */ + __local u32 w_s[16]; + + if (lid < 1) + { + w_s[ 0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); + w_s[ 1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); + w_s[ 2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); + w_s[ 3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); + w_s[ 4] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); + w_s[ 5] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); + w_s[ 6] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); + w_s[ 7] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); + w_s[ 8] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); + w_s[ 9] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); + w_s[10] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); + w_s[11] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); + w_s[12] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); + w_s[13] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); + w_s[14] = 0; + w_s[15] = 0; + } + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -948,7 +976,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s08 (__glo * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400s (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -999,6 +1027,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s16 (__glo * s_msg_buf */ + __local u32 w_s[16]; + + if (lid < 1) + { + w_s[ 0] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 0]); + w_s[ 1] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 1]); + w_s[ 2] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 2]); + w_s[ 3] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 3]); + w_s[ 4] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 4]); + w_s[ 5] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 5]); + w_s[ 6] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 6]); + w_s[ 7] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 7]); + w_s[ 8] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 8]); + w_s[ 9] = swap32 (ikepsk_bufs[salt_pos].nr_buf[ 9]); + w_s[10] = swap32 (ikepsk_bufs[salt_pos].nr_buf[10]); + w_s[11] = swap32 (ikepsk_bufs[salt_pos].nr_buf[11]); + w_s[12] = swap32 (ikepsk_bufs[salt_pos].nr_buf[12]); + w_s[13] = swap32 (ikepsk_bufs[salt_pos].nr_buf[13]); + w_s[14] = 0; + w_s[15] = 0; + } + __local u32 s_msg_buf[128]; const u32 lid2 = lid * 2; @@ -1014,5 +1064,5 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s16 (__glo * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); + m05400s (w_s, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); }