Applied performance changes and fixed multi-mode bad implementation

pull/1237/head
DoZ10 7 years ago
parent f31f057113
commit 8b6120243d

@ -79,7 +79,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[14] = ctx[14];
x[15] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(0, 4, 8, 12);
@ -138,7 +139,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[30] = ctx[14];
x[31] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(16, 20, 24, 28);
@ -233,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
u32 position[2] = { 0 };
u32 offset = 0;
position[0] = esalt_bufs->position[0];
position[1] = esalt_bufs->position[1];
position[0] = esalt_bufs[digests_offset].position[0];
position[1] = esalt_bufs[digests_offset].position[1];
offset = esalt_bufs->offset;
offset = esalt_bufs[digests_offset].offset;
iv[0] = esalt_bufs->iv[0];
iv[1] = esalt_bufs->iv[1];
iv[0] = esalt_bufs[digests_offset].iv[0];
iv[1] = esalt_bufs[digests_offset].iv[1];
plain[0] = esalt_bufs->plain[0];
plain[1] = esalt_bufs->plain[1];
plain[0] = esalt_bufs[digests_offset].plain[0];
plain[1] = esalt_bufs[digests_offset].plain[1];
/**
* loop
@ -252,8 +254,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect(pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);

@ -79,7 +79,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[14] = ctx[14];
x[15] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(0, 4, 8, 12);
@ -138,7 +139,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[30] = ctx[14];
x[31] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(16, 20, 24, 28);
@ -233,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
u32 position[2] = { 0 };
u32 offset = 0;
position[0] = esalt_bufs->position[0];
position[1] = esalt_bufs->position[1];
position[0] = esalt_bufs[digests_offset].position[0];
position[1] = esalt_bufs[digests_offset].position[1];
offset = esalt_bufs->offset;
offset = esalt_bufs[digests_offset].offset;
iv[0] = esalt_bufs->iv[0];
iv[1] = esalt_bufs->iv[1];
iv[0] = esalt_bufs[digests_offset].iv[0];
iv[1] = esalt_bufs[digests_offset].iv[1];
plain[0] = esalt_bufs->plain[0];
plain[1] = esalt_bufs->plain[1];
plain[0] = esalt_bufs[digests_offset].plain[0];
plain[1] = esalt_bufs[digests_offset].plain[1];
/**
* loop
@ -297,8 +299,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -308,14 +308,6 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
u32x digest[4] = { 0 };
@ -449,8 +441,6 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -460,14 +450,6 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
u32x digest[4] = { 0 };

@ -77,7 +77,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[14] = ctx[14];
x[15] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(0, 4, 8, 12);
@ -136,7 +137,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[
x[30] = ctx[14];
x[31] = ctx[15];
for (u8 i = 0; i < 10; ++i)
#pragma unroll
for (u8 i = 0; i < 10; i++)
{
/* Column round */
QR(16, 20, 24, 28);
@ -208,6 +210,22 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
const u32 gid = get_global_id (0);
const u32 lid = get_local_id (0);
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
u32x out_len = pws[gid].pw_len;
/**
* Salt prep
*/
@ -217,16 +235,16 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
u32 position[2] = { 0 };
u32 offset = 0;
position[0] = esalt_bufs->position[0];
position[1] = esalt_bufs->position[1];
position[0] = esalt_bufs[digests_offset].position[0];
position[1] = esalt_bufs[digests_offset].position[1];
offset = esalt_bufs->offset;
offset = esalt_bufs[digests_offset].offset;
iv[0] = esalt_bufs->iv[0];
iv[1] = esalt_bufs->iv[1];
iv[0] = esalt_bufs[digests_offset].iv[0];
iv[1] = esalt_bufs[digests_offset].iv[1];
plain[0] = esalt_bufs->plain[0];
plain[1] = esalt_bufs->plain[1];
plain[0] = esalt_bufs[digests_offset].plain[0];
plain[1] = esalt_bufs[digests_offset].plain[1];
/**
* loop
@ -239,33 +257,21 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = w0x;
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = pws[gid].i[14];
w3[3] = pws[gid].i[15];
u32x out_len = pws[gid].pw_len;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0, w1, position, offset, iv, plain, digest);
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];
@ -293,6 +299,22 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
const u32 gid = get_global_id (0);
const u32 lid = get_local_id (0);
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
u32x out_len = pws[gid].pw_len;
/**
* Salt prep
*/
@ -336,33 +358,21 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = w0x;
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = pws[gid].i[14];
w3[3] = pws[gid].i[15];
u32x out_len = pws[gid].pw_len;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0, w1, position, offset, iv, plain, digest);
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];

@ -18562,7 +18562,8 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le
{
u32 *ptr = digest_buf;
const chacha20_t *chacha20 = (const chacha20_t *) esalts_buf;
const chacha20_t *chacha20_tmp = (const chacha20_t *) esalts_buf;
const chacha20_t *chacha20 = &chacha20_tmp[digest_cur];
snprintf (out_buf, out_len - 1, "%s*%08x%08x*%d*%08x%08x*%08x%08x*%08x%08x",
SIGNATURE_CHACHA20,

Loading…
Cancel
Save