From 9ce30defcbba758382460bd01e7ce1821e42264b Mon Sep 17 00:00:00 2001 From: tweqx Date: Sat, 21 May 2022 19:32:39 +0200 Subject: [PATCH] Don't apply the salt in the a3 BLAKE2b($pass.$salt) optimized OpenCL code --- OpenCL/m00610_a3-optimized.cl | 179 ++++------------------------------ 1 file changed, 18 insertions(+), 161 deletions(-) diff --git a/OpenCL/m00610_a3-optimized.cl b/OpenCL/m00610_a3-optimized.cl index 7a406b40e..7402791ef 100644 --- a/OpenCL/m00610_a3-optimized.cl +++ b/OpenCL/m00610_a3-optimized.cl @@ -20,38 +20,6 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO * modifiers are taken from args */ - /** - * salt - */ - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - u32 salt_buf3[4]; - - salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; - salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; - salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; - salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; - salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; - salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; - - const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - /** * loop */ @@ -63,59 +31,20 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = w0x; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w0[1], w0[0]); - m[ 1] = hl32_to_64 (w0[3], w0[2]); - m[ 2] = hl32_to_64 (w1[1], w1[0]); - m[ 3] = hl32_to_64 (w1[3], w1[2]); - m[ 4] = hl32_to_64 (w2[1], w2[0]); - m[ 5] = hl32_to_64 (w2[3], w2[2]); - m[ 6] = hl32_to_64 (w3[1], w3[0]); - m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 0] = hl32_to_64 (w[ 1], w0x ); + m[ 1] = hl32_to_64 (w[ 3], w[ 2]); + m[ 2] = hl32_to_64 (w[ 5], w[ 4]); + m[ 3] = hl32_to_64 (w[ 7], w[ 6]); + m[ 4] = hl32_to_64 (w[ 9], w[ 8]); + m[ 5] = hl32_to_64 (w[11], w[10]); + m[ 6] = hl32_to_64 (w[13], w[12]); + m[ 7] = hl32_to_64 (w[15], w[14]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -136,7 +65,7 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]); @@ -165,38 +94,6 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] }; - /** - * salt - */ - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - u32 salt_buf3[4]; - - salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; - salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; - salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; - salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; - salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; - salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; - - const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - /** * loop */ @@ -208,59 +105,20 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = w0x; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w0[1], w0[0]); - m[ 1] = hl32_to_64 (w0[3], w0[2]); - m[ 2] = hl32_to_64 (w1[1], w1[0]); - m[ 3] = hl32_to_64 (w1[3], w1[2]); - m[ 4] = hl32_to_64 (w2[1], w2[0]); - m[ 5] = hl32_to_64 (w2[3], w2[2]); - m[ 6] = hl32_to_64 (w3[1], w3[0]); - m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 0] = hl32_to_64 (w[ 1], w0x ); + m[ 1] = hl32_to_64 (w[ 3], w[ 2]); + m[ 2] = hl32_to_64 (w[ 5], w[ 4]); + m[ 3] = hl32_to_64 (w[ 7], w[ 6]); + m[ 4] = hl32_to_64 (w[ 9], w[ 8]); + m[ 5] = hl32_to_64 (w[11], w[10]); + m[ 6] = hl32_to_64 (w[13], w[12]); + m[ 7] = hl32_to_64 (w[15], w[14]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -281,7 +139,7 @@ DECLSPEC void m00610s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]); @@ -531,4 +389,3 @@ KERNEL_FQ void m00610_s16 (KERN_ATTR_VECTOR ()) m00610s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } -