diff --git a/OpenCL/m15400_a0.cl b/OpenCL/m15400_a0.cl index 1d7cd50c0..86bfdc3ce 100644 --- a/OpenCL/m15400_a0.cl +++ b/OpenCL/m15400_a0.cl @@ -79,7 +79,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] = ctx[14]; x[15] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(0, 4, 8, 12); @@ -138,7 +139,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[30] = ctx[14]; x[31] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(16, 20, 24, 28); @@ -233,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs->position[0]; - position[1] = esalt_bufs->position[1]; + position[0] = esalt_bufs[digests_offset].position[0]; + position[1] = esalt_bufs[digests_offset].position[1]; - offset = esalt_bufs->offset; + offset = esalt_bufs[digests_offset].offset; - iv[0] = esalt_bufs->iv[0]; - iv[1] = esalt_bufs->iv[1]; + iv[0] = esalt_bufs[digests_offset].iv[0]; + iv[1] = esalt_bufs[digests_offset].iv[1]; - plain[0] = esalt_bufs->plain[0]; - plain[1] = esalt_bufs->plain[1]; + plain[0] = esalt_bufs[digests_offset].plain[0]; + plain[1] = esalt_bufs[digests_offset].plain[1]; /** * loop @@ -252,8 +254,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule { u32x w0[4] = { 0 }; u32x w1[4] = { 0 }; - u32x w2[4] = { 0 }; - u32x w3[4] = { 0 }; const u32x out_len = apply_rules_vect(pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); diff --git a/OpenCL/m15400_a1.cl b/OpenCL/m15400_a1.cl index ab7c2f570..42420512c 100644 --- a/OpenCL/m15400_a1.cl +++ b/OpenCL/m15400_a1.cl @@ -79,7 +79,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] = ctx[14]; x[15] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(0, 4, 8, 12); @@ -138,7 +139,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[30] = ctx[14]; x[31] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(16, 20, 24, 28); @@ -233,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs->position[0]; - position[1] = esalt_bufs->position[1]; + position[0] = esalt_bufs[digests_offset].position[0]; + position[1] = esalt_bufs[digests_offset].position[1]; - offset = esalt_bufs->offset; + offset = esalt_bufs[digests_offset].offset; - iv[0] = esalt_bufs->iv[0]; - iv[1] = esalt_bufs->iv[1]; + iv[0] = esalt_bufs[digests_offset].iv[0]; + iv[1] = esalt_bufs[digests_offset].iv[1]; - plain[0] = esalt_bufs->plain[0]; - plain[1] = esalt_bufs->plain[1]; + plain[0] = esalt_bufs[digests_offset].plain[0]; + plain[1] = esalt_bufs[digests_offset].plain[1]; /** * loop @@ -297,8 +299,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x w0[4]; u32x w1[4]; - u32x w2[4]; - u32x w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -308,14 +308,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; u32x digest[4] = { 0 }; @@ -449,8 +441,6 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x w0[4]; u32x w1[4]; - u32x w2[4]; - u32x w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -460,14 +450,6 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; u32x digest[4] = { 0 }; diff --git a/OpenCL/m15400_a3.cl b/OpenCL/m15400_a3.cl index 3ef55c615..c13dc052c 100644 --- a/OpenCL/m15400_a3.cl +++ b/OpenCL/m15400_a3.cl @@ -77,7 +77,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] = ctx[14]; x[15] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(0, 4, 8, 12); @@ -136,7 +137,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[30] = ctx[14]; x[31] = ctx[15]; - for (u8 i = 0; i < 10; ++i) + #pragma unroll + for (u8 i = 0; i < 10; i++) { /* Column round */ QR(16, 20, 24, 28); @@ -208,6 +210,22 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32x out_len = pws[gid].pw_len; + /** * Salt prep */ @@ -217,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs->position[0]; - position[1] = esalt_bufs->position[1]; + position[0] = esalt_bufs[digests_offset].position[0]; + position[1] = esalt_bufs[digests_offset].position[1]; - offset = esalt_bufs->offset; + offset = esalt_bufs[digests_offset].offset; - iv[0] = esalt_bufs->iv[0]; - iv[1] = esalt_bufs->iv[1]; + iv[0] = esalt_bufs[digests_offset].iv[0]; + iv[1] = esalt_bufs[digests_offset].iv[1]; - plain[0] = esalt_bufs->plain[0]; - plain[1] = esalt_bufs->plain[1]; + plain[0] = esalt_bufs[digests_offset].plain[0]; + plain[1] = esalt_bufs[digests_offset].plain[1]; /** * loop @@ -239,33 +257,21 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32x w0_t[4]; + u32x w1_t[4]; - w0[0] = w0x; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32x out_len = pws[gid].pw_len; + w0_t[0] = w0x; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, offset, iv, plain, digest); + chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; @@ -293,6 +299,22 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32x out_len = pws[gid].pw_len; + /** * Salt prep */ @@ -336,33 +358,21 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32x w0_t[4]; + u32x w1_t[4]; - w0[0] = w0x; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32x out_len = pws[gid].pw_len; + w0_t[0] = w0x; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, offset, iv, plain, digest); + chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; diff --git a/src/interface.c b/src/interface.c index 9ee2a3983..1b40c395d 100644 --- a/src/interface.c +++ b/src/interface.c @@ -18562,7 +18562,8 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le { u32 *ptr = digest_buf; - const chacha20_t *chacha20 = (const chacha20_t *) esalts_buf; + const chacha20_t *chacha20_tmp = (const chacha20_t *) esalts_buf; + const chacha20_t *chacha20 = &chacha20_tmp[digest_cur]; snprintf (out_buf, out_len - 1, "%s*%08x%08x*%d*%08x%08x*%08x%08x*%08x%08x", SIGNATURE_CHACHA20,