diff --git a/OpenCL/m00610_a3-optimized.cl b/OpenCL/m00610_a3-optimized.cl index 7402791ef..1ebbffb51 100644 --- a/OpenCL/m00610_a3-optimized.cl +++ b/OpenCL/m00610_a3-optimized.cl @@ -14,12 +14,45 @@ #include M2S(INCLUDE_PATH/inc_hash_blake2b.cl) #endif + DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) { /** * modifiers are taken from args */ + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS_HOST].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS_HOST].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS_HOST].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS_HOST].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS_HOST].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS_HOST].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS_HOST].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS_HOST].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS_HOST].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS_HOST].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + /** * loop */ @@ -31,20 +64,59 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = w0x; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + /** * blake2b */ u64x m[16]; - m[ 0] = hl32_to_64 (w[ 1], w0x ); - m[ 1] = hl32_to_64 (w[ 3], w[ 2]); - m[ 2] = hl32_to_64 (w[ 5], w[ 4]); - m[ 3] = hl32_to_64 (w[ 7], w[ 6]); - m[ 4] = hl32_to_64 (w[ 9], w[ 8]); - m[ 5] = hl32_to_64 (w[11], w[10]); - m[ 6] = hl32_to_64 (w[13], w[12]); - m[ 7] = hl32_to_64 (w[15], w[14]); + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); m[ 8] = 0; m[ 9] = 0; m[10] = 0; @@ -65,7 +137,7 @@ DECLSPEC void m00610m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTO h[6] = BLAKE2B_IV_06; h[7] = BLAKE2B_IV_07; - blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL); + blake2b_transform_vector (h, m, pw_salt_len, BLAKE2B_FINAL); const u32x r0 = h32_from_64 (h[0]); const u32x r1 = l32_from_64 (h[0]);