mirror of
https://github.com/hashcat/hashcat.git
synced 2025-05-25 10:18:47 +00:00
Zero pws_buf before reuse
This commit is contained in:
parent
6c10ca5853
commit
b409e5e9e1
@ -59,18 +59,26 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -91,33 +99,27 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* append salt
|
||||
*/
|
||||
|
||||
u32x s0[4];
|
||||
u32x s0[4] = { 0 };
|
||||
u32x s1[4] = { 0 };
|
||||
u32x s2[4] = { 0 };
|
||||
u32x s3[4] = { 0 };
|
||||
|
||||
s0[0] = salt_buf0[0];
|
||||
s0[1] = salt_buf0[1];
|
||||
s0[2] = salt_buf0[2];
|
||||
s0[3] = salt_buf0[3];
|
||||
|
||||
u32x s1[4];
|
||||
|
||||
s1[0] = salt_buf1[0];
|
||||
s1[1] = salt_buf1[1];
|
||||
s1[2] = salt_buf1[2];
|
||||
s1[3] = salt_buf1[3];
|
||||
|
||||
u32x s2[4];
|
||||
|
||||
s2[0] = 0;
|
||||
s2[1] = 0;
|
||||
s2[2] = 0;
|
||||
s2[3] = 0;
|
||||
|
||||
u32x s3[4];
|
||||
|
||||
s3[0] = 0;
|
||||
s3[1] = 0;
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
s2[0] = salt_buf2[0];
|
||||
s2[1] = salt_buf2[1];
|
||||
s2[2] = salt_buf2[2];
|
||||
s2[3] = salt_buf2[3];
|
||||
s3[0] = salt_buf3[0];
|
||||
s3[1] = salt_buf3[1];
|
||||
s3[2] = salt_buf3[2];
|
||||
s3[3] = salt_buf3[3];
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
|
||||
|
||||
@ -127,24 +129,19 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w0[1] |= s0[1];
|
||||
w0[2] |= s0[2];
|
||||
w0[3] |= s0[3];
|
||||
|
||||
w1[0] |= s1[0];
|
||||
w1[1] |= s1[1];
|
||||
w1[2] |= s1[2];
|
||||
w1[3] |= s1[3];
|
||||
|
||||
w2[0] |= s2[0];
|
||||
w2[1] |= s2[1];
|
||||
w2[2] |= s2[2];
|
||||
w2[3] |= s2[3];
|
||||
|
||||
w3[0] |= s3[0];
|
||||
w3[1] |= s3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
append_0x80_4x4 (w0, w1, w2, w3, pw_salt_len);
|
||||
|
||||
/**
|
||||
* md5
|
||||
*/
|
||||
@ -271,18 +268,26 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -315,33 +320,27 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* append salt
|
||||
*/
|
||||
|
||||
u32x s0[4];
|
||||
u32x s0[4] = { 0 };
|
||||
u32x s1[4] = { 0 };
|
||||
u32x s2[4] = { 0 };
|
||||
u32x s3[4] = { 0 };
|
||||
|
||||
s0[0] = salt_buf0[0];
|
||||
s0[1] = salt_buf0[1];
|
||||
s0[2] = salt_buf0[2];
|
||||
s0[3] = salt_buf0[3];
|
||||
|
||||
u32x s1[4];
|
||||
|
||||
s1[0] = salt_buf1[0];
|
||||
s1[1] = salt_buf1[1];
|
||||
s1[2] = salt_buf1[2];
|
||||
s1[3] = salt_buf1[3];
|
||||
|
||||
u32x s2[4];
|
||||
|
||||
s2[0] = 0;
|
||||
s2[1] = 0;
|
||||
s2[2] = 0;
|
||||
s2[3] = 0;
|
||||
|
||||
u32x s3[4];
|
||||
|
||||
s3[0] = 0;
|
||||
s3[1] = 0;
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
s2[0] = salt_buf2[0];
|
||||
s2[1] = salt_buf2[1];
|
||||
s2[2] = salt_buf2[2];
|
||||
s2[3] = salt_buf2[3];
|
||||
s3[0] = salt_buf3[0];
|
||||
s3[1] = salt_buf3[1];
|
||||
s3[2] = salt_buf3[2];
|
||||
s3[3] = salt_buf3[3];
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, out_len);
|
||||
|
||||
@ -351,24 +350,19 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w0[1] |= s0[1];
|
||||
w0[2] |= s0[2];
|
||||
w0[3] |= s0[3];
|
||||
|
||||
w1[0] |= s1[0];
|
||||
w1[1] |= s1[1];
|
||||
w1[2] |= s1[2];
|
||||
w1[3] |= s1[3];
|
||||
|
||||
w2[0] |= s2[0];
|
||||
w2[1] |= s2[1];
|
||||
w2[2] |= s2[2];
|
||||
w2[3] |= s2[3];
|
||||
|
||||
w3[0] |= s3[0];
|
||||
w3[1] |= s3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
append_0x80_4x4 (w0, w1, w2, w3, pw_salt_len);
|
||||
|
||||
/**
|
||||
* md5
|
||||
*/
|
||||
|
@ -56,15 +56,25 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -138,8 +148,8 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* append salt
|
||||
@ -158,6 +168,14 @@ __kernel void m00010_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s1[1] = salt_buf1[1];
|
||||
s1[2] = salt_buf1[2];
|
||||
s1[3] = salt_buf1[3];
|
||||
s2[0] = salt_buf2[0];
|
||||
s2[1] = salt_buf2[1];
|
||||
s2[2] = salt_buf2[2];
|
||||
s2[3] = salt_buf2[3];
|
||||
s3[0] = salt_buf3[0];
|
||||
s3[1] = salt_buf3[1];
|
||||
s3[2] = salt_buf3[2];
|
||||
s3[3] = salt_buf3[3];
|
||||
|
||||
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
|
||||
|
||||
@ -305,15 +323,25 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -399,8 +427,8 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
/**
|
||||
* append salt
|
||||
@ -419,6 +447,14 @@ __kernel void m00010_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s1[1] = salt_buf1[1];
|
||||
s1[2] = salt_buf1[2];
|
||||
s1[3] = salt_buf1[3];
|
||||
s2[0] = salt_buf2[0];
|
||||
s2[1] = salt_buf2[1];
|
||||
s2[2] = salt_buf2[2];
|
||||
s2[3] = salt_buf2[3];
|
||||
s3[0] = salt_buf3[0];
|
||||
s3[1] = salt_buf3[1];
|
||||
s3[2] = salt_buf3[2];
|
||||
s3[3] = salt_buf3[3];
|
||||
|
||||
switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len);
|
||||
|
||||
|
@ -51,32 +51,28 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
|
||||
|
||||
@ -97,11 +93,10 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
|
||||
w[14] |= salt_buf3[2];
|
||||
w[15] |= salt_buf3[3];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w[14] = pw_salt_len * 8;
|
||||
w[15] = 0;
|
||||
|
||||
/**
|
||||
* base
|
||||
|
@ -59,18 +59,26 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -125,10 +133,19 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1_t[1] |= salt_buf1[1];
|
||||
w1_t[2] |= salt_buf1[2];
|
||||
w1_t[3] |= salt_buf1[3];
|
||||
w2_t[0] |= salt_buf2[0];
|
||||
w2_t[1] |= salt_buf2[1];
|
||||
w2_t[2] |= salt_buf2[2];
|
||||
w2_t[3] |= salt_buf2[3];
|
||||
w3_t[0] |= salt_buf3[0];
|
||||
w3_t[1] |= salt_buf3[1];
|
||||
w3_t[2] |= salt_buf3[2];
|
||||
w3_t[3] |= salt_buf3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
@ -256,18 +273,26 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -334,10 +359,19 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1_t[1] |= salt_buf1[1];
|
||||
w1_t[2] |= salt_buf1[2];
|
||||
w1_t[3] |= salt_buf1[3];
|
||||
w2_t[0] |= salt_buf2[0];
|
||||
w2_t[1] |= salt_buf2[1];
|
||||
w2_t[2] |= salt_buf2[2];
|
||||
w2_t[3] |= salt_buf2[3];
|
||||
w3_t[0] |= salt_buf3[0];
|
||||
w3_t[1] |= salt_buf3[1];
|
||||
w3_t[2] |= salt_buf3[2];
|
||||
w3_t[3] |= salt_buf3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
|
@ -56,15 +56,25 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -157,12 +167,12 @@ __kernel void m00020_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= 0;
|
||||
w2[1] |= 0;
|
||||
w2[2] |= 0;
|
||||
w2[3] |= 0;
|
||||
w3[0] |= 0;
|
||||
w3[1] |= 0;
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
@ -290,15 +300,25 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -403,12 +423,12 @@ __kernel void m00020_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= 0;
|
||||
w2[1] |= 0;
|
||||
w2[2] |= 0;
|
||||
w2[3] |= 0;
|
||||
w3[0] |= 0;
|
||||
w3[1] |= 0;
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "OpenCL/common.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
static void m00020m (u32 t0[4], u32 t1[4], u32 t2[4], u32 t3[4], const u32 pw_len, __global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset)
|
||||
{
|
||||
/**
|
||||
* modifier
|
||||
@ -34,32 +34,26 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = 0;
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -74,22 +68,22 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
w3_t[2] = w3[2];
|
||||
w3_t[3] = w3[3];
|
||||
w0_t[0] = t0[0];
|
||||
w0_t[1] = t0[1];
|
||||
w0_t[2] = t0[2];
|
||||
w0_t[3] = t0[3];
|
||||
w1_t[0] = t1[0];
|
||||
w1_t[1] = t1[1];
|
||||
w1_t[2] = t1[2];
|
||||
w1_t[3] = t1[3];
|
||||
w2_t[0] = t2[0];
|
||||
w2_t[1] = t2[1];
|
||||
w2_t[2] = t2[2];
|
||||
w2_t[3] = t2[3];
|
||||
w3_t[0] = t3[0];
|
||||
w3_t[1] = t3[1];
|
||||
w3_t[2] = t3[2];
|
||||
w3_t[3] = t3[3];
|
||||
|
||||
switch_buffer_by_offset_le_S (w0_t, w1_t, w2_t, w3_t, salt_len);
|
||||
|
||||
@ -114,7 +108,7 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
* loop
|
||||
*/
|
||||
|
||||
u32 w0l = w0[0];
|
||||
u32 w0l = t0[0];
|
||||
|
||||
for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
@ -143,27 +137,29 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
|
||||
overwrite_at_le (wx, w0lr, salt_len);
|
||||
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0_t[0] = wx[ 0];
|
||||
w0_t[1] = wx[ 1];
|
||||
w0_t[2] = wx[ 2];
|
||||
w0_t[3] = wx[ 3];
|
||||
w1_t[0] = wx[ 4];
|
||||
w1_t[1] = wx[ 5];
|
||||
w1_t[2] = wx[ 6];
|
||||
w1_t[3] = wx[ 7];
|
||||
w2_t[0] = wx[ 8];
|
||||
w2_t[1] = wx[ 9];
|
||||
w2_t[2] = wx[10];
|
||||
w2_t[3] = wx[11];
|
||||
w3_t[0] = wx[12];
|
||||
w3_t[1] = wx[13];
|
||||
w3_t[2] = pw_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
w0[0] = wx[ 0];
|
||||
w0[1] = wx[ 1];
|
||||
w0[2] = wx[ 2];
|
||||
w0[3] = wx[ 3];
|
||||
w1[0] = wx[ 4];
|
||||
w1[1] = wx[ 5];
|
||||
w1[2] = wx[ 6];
|
||||
w1[3] = wx[ 7];
|
||||
w2[0] = wx[ 8];
|
||||
w2[1] = wx[ 9];
|
||||
w2[2] = wx[10];
|
||||
w2[3] = wx[11];
|
||||
w3[0] = wx[12];
|
||||
w3[1] = wx[13];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
append_0x80_4x4 (w0, w1, w2, w3, pw_salt_len);
|
||||
|
||||
/**
|
||||
* md5
|
||||
@ -174,73 +170,73 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03);
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01);
|
||||
MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02);
|
||||
MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03);
|
||||
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13);
|
||||
MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10);
|
||||
MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11);
|
||||
MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12);
|
||||
MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13);
|
||||
|
||||
MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23);
|
||||
MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20);
|
||||
MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21);
|
||||
MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22);
|
||||
MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23);
|
||||
|
||||
MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
|
||||
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
@ -272,32 +268,26 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = 0;
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
|
@ -61,18 +61,26 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -147,12 +155,8 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] |= s2[3];
|
||||
w3_t[0] |= s3[0];
|
||||
w3_t[1] |= s3[1];
|
||||
w3_t[2] |= s3[2];
|
||||
w3_t[3] |= s3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
@ -280,18 +284,26 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -378,12 +390,8 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] |= s2[3];
|
||||
w3_t[0] |= s3[0];
|
||||
w3_t[1] |= s3[1];
|
||||
w3_t[2] |= s3[2];
|
||||
w3_t[3] |= s3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
|
@ -58,15 +58,25 @@ __kernel void m00030_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -312,15 +322,25 @@ __kernel void m00030_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
|
@ -51,32 +51,28 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len);
|
||||
|
||||
@ -97,11 +93,10 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global k
|
||||
w[14] |= salt_buf3[2];
|
||||
w[15] |= salt_buf3[3];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w[14] = pw_salt_len * 8;
|
||||
w[15] = 0;
|
||||
|
||||
/**
|
||||
* base
|
||||
|
@ -59,18 +59,26 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -111,10 +119,19 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1_t[1] |= salt_buf1[1];
|
||||
w1_t[2] |= salt_buf1[2];
|
||||
w1_t[3] |= salt_buf1[3];
|
||||
w2_t[0] |= salt_buf2[0];
|
||||
w2_t[1] |= salt_buf2[1];
|
||||
w2_t[2] |= salt_buf2[2];
|
||||
w2_t[3] |= salt_buf2[3];
|
||||
w3_t[0] |= salt_buf3[0];
|
||||
w3_t[1] |= salt_buf3[1];
|
||||
w3_t[2] |= salt_buf3[2];
|
||||
w3_t[3] |= salt_buf3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
@ -242,18 +259,26 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -306,10 +331,19 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1_t[1] |= salt_buf1[1];
|
||||
w1_t[2] |= salt_buf1[2];
|
||||
w1_t[3] |= salt_buf1[3];
|
||||
w2_t[0] |= salt_buf2[0];
|
||||
w2_t[1] |= salt_buf2[1];
|
||||
w2_t[2] |= salt_buf2[2];
|
||||
w2_t[3] |= salt_buf2[3];
|
||||
w3_t[0] |= salt_buf3[0];
|
||||
w3_t[1] |= salt_buf3[1];
|
||||
w3_t[2] |= salt_buf3[2];
|
||||
w3_t[3] |= salt_buf3[3];
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, out_salt_len);
|
||||
|
||||
w3_t[2] = out_salt_len * 8;
|
||||
w3_t[3] = 0;
|
||||
|
||||
/**
|
||||
* md5
|
||||
|
@ -58,15 +58,25 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -164,12 +174,12 @@ __kernel void m00040_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= 0;
|
||||
w2[1] |= 0;
|
||||
w2[2] |= 0;
|
||||
w2[3] |= 0;
|
||||
w3[0] |= 0;
|
||||
w3[1] |= 0;
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
@ -298,15 +308,25 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7];
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -416,12 +436,12 @@ __kernel void m00040_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[1] |= salt_buf1[1];
|
||||
w1[2] |= salt_buf1[2];
|
||||
w1[3] |= salt_buf1[3];
|
||||
w2[0] |= 0;
|
||||
w2[1] |= 0;
|
||||
w2[2] |= 0;
|
||||
w2[3] |= 0;
|
||||
w3[0] |= 0;
|
||||
w3[1] |= 0;
|
||||
w2[0] |= salt_buf2[0];
|
||||
w2[1] |= salt_buf2[1];
|
||||
w2[2] |= salt_buf2[2];
|
||||
w2[3] |= salt_buf2[3];
|
||||
w3[0] |= salt_buf3[0];
|
||||
w3[1] |= salt_buf3[1];
|
||||
w3[2] = pw_salt_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
|
@ -34,32 +34,26 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = 0;
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
@ -272,32 +266,26 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = 0;
|
||||
salt_buf2[1] = 0;
|
||||
salt_buf2[2] = 0;
|
||||
salt_buf2[3] = 0;
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = 0;
|
||||
salt_buf3[1] = 0;
|
||||
salt_buf3[2] = 0;
|
||||
salt_buf3[3] = 0;
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
|
@ -242,28 +242,22 @@ __kernel void m00050_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
@ -391,28 +385,22 @@ __kernel void m00050_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
|
@ -217,28 +217,22 @@ static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
@ -334,28 +328,22 @@ static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
|
||||
u32 salt_buf2[4];
|
||||
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
|
@ -242,50 +242,54 @@ __kernel void m00060_m04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
/**
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
w3_t[2] = 0;
|
||||
w3_t[3] = 0;
|
||||
w2_t[0] = salt_buf2[0];
|
||||
w2_t[1] = salt_buf2[1];
|
||||
w2_t[2] = salt_buf2[2];
|
||||
w2_t[3] = salt_buf2[3];
|
||||
w3_t[0] = salt_buf3[0];
|
||||
w3_t[1] = salt_buf3[1];
|
||||
w3_t[2] = salt_buf3[2];
|
||||
w3_t[3] = salt_buf3[3];
|
||||
|
||||
u32x ipad[4];
|
||||
u32x opad[4];
|
||||
@ -377,50 +381,54 @@ __kernel void m00060_s04 (__global pw_t *pws, __global kernel_rule_t * rules_bu
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
/**
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
w3_t[2] = 0;
|
||||
w3_t[3] = 0;
|
||||
w2_t[0] = salt_buf2[0];
|
||||
w2_t[1] = salt_buf2[1];
|
||||
w2_t[2] = salt_buf2[2];
|
||||
w2_t[3] = salt_buf2[3];
|
||||
w3_t[0] = salt_buf3[0];
|
||||
w3_t[1] = salt_buf3[1];
|
||||
w3_t[2] = salt_buf3[2];
|
||||
w3_t[3] = salt_buf3[3];
|
||||
|
||||
u32x ipad[4];
|
||||
u32x opad[4];
|
||||
|
@ -235,32 +235,26 @@ static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
w3_t[2] = 0;
|
||||
w3_t[3] = 0;
|
||||
w2_t[0] = salt_buf2[0];
|
||||
w2_t[1] = salt_buf2[1];
|
||||
w2_t[2] = salt_buf2[2];
|
||||
w2_t[3] = salt_buf2[3];
|
||||
w3_t[0] = salt_buf3[0];
|
||||
w3_t[1] = salt_buf3[1];
|
||||
w3_t[2] = salt_buf3[2];
|
||||
w3_t[3] = salt_buf3[3];
|
||||
|
||||
u32x ipad[4];
|
||||
u32x opad[4];
|
||||
@ -320,50 +314,54 @@ static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
u32 salt_buf2[4];
|
||||
u32 salt_buf3[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6];
|
||||
salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7];
|
||||
salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8];
|
||||
salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9];
|
||||
salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10];
|
||||
salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11];
|
||||
salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12];
|
||||
salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13];
|
||||
salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14];
|
||||
salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15];
|
||||
|
||||
const u32 salt_len = salt_bufs[salt_pos].salt_len;
|
||||
|
||||
/**
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
w3_t[2] = 0;
|
||||
w3_t[3] = 0;
|
||||
w2_t[0] = salt_buf2[0];
|
||||
w2_t[1] = salt_buf2[1];
|
||||
w2_t[2] = salt_buf2[2];
|
||||
w2_t[3] = salt_buf2[3];
|
||||
w3_t[0] = salt_buf3[0];
|
||||
w3_t[1] = salt_buf3[1];
|
||||
w3_t[2] = salt_buf3[2];
|
||||
w3_t[3] = salt_buf3[3];
|
||||
|
||||
u32x ipad[4];
|
||||
u32x opad[4];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,39 +72,25 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -166,32 +129,32 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2] | s0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3] | s0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0] | s1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1] | s1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2] | s1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3] | s1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0] | s2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1] | s2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2] | s2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3] | s2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0] | s3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1] | s3[1];
|
||||
@ -204,28 +167,28 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -323,13 +286,7 @@ __kernel void m00110_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -357,54 +314,31 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -428,45 +362,31 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -505,32 +425,32 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2] | s0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3] | s0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0] | s1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1] | s1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2] | s1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3] | s1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0] | s2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1] | s2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2] | s2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3] | s2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0] | s3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1] | s3[1];
|
||||
@ -543,28 +463,28 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -658,20 +578,14 @@ __kernel void m00110_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,43 +72,35 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -154,12 +123,12 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
@ -214,11 +183,11 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -316,13 +285,7 @@ __kernel void m00120_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,54 +313,31 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -421,49 +361,41 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -486,12 +418,12 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
@ -546,11 +478,11 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -645,19 +577,13 @@ __kernel void m00120_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
|
||||
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
// no unicode yet
|
||||
|
||||
@ -38,54 +38,31 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -97,39 +74,25 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -172,10 +135,10 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -194,10 +157,10 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -244,11 +207,11 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -346,13 +309,7 @@ __kernel void m00130_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,54 +337,31 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -451,45 +385,31 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -532,10 +452,10 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -554,10 +474,10 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -604,11 +524,11 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -703,19 +623,13 @@ __kernel void m00130_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
|
||||
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
// no unicode yet
|
||||
|
||||
@ -38,54 +38,31 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -97,39 +74,25 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -142,10 +105,10 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -164,10 +127,10 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -208,11 +171,11 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -310,13 +273,7 @@ __kernel void m00140_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -344,54 +301,31 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -415,45 +349,31 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -466,10 +386,10 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -488,10 +408,10 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -532,11 +452,11 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -631,19 +551,13 @@ __kernel void m00140_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
|
||||
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -253,41 +253,20 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -312,67 +291,53 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -383,28 +348,28 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -437,12 +402,7 @@ __kernel void m00150_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -470,41 +430,20 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -541,67 +480,53 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -612,28 +537,28 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -666,12 +591,7 @@ __kernel void m00150_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -253,41 +253,20 @@ __kernel void m00160_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -347,67 +326,53 @@ __kernel void m00160_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -437,12 +402,7 @@ __kernel void m00160_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -470,41 +430,20 @@ __kernel void m00160_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -576,67 +515,53 @@ __kernel void m00160_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -666,12 +591,7 @@ __kernel void m00160_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,110 +36,71 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -150,28 +111,28 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -274,25 +235,11 @@ __kernel void m00190_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d += SHA1M_D;
|
||||
c += SHA1M_C;
|
||||
|
||||
{
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = d;
|
||||
const u32 r3 = c;
|
||||
|
||||
#include COMPARE_M
|
||||
}
|
||||
COMPARE_M_SIMD (a, e, d, c);
|
||||
|
||||
a &= 0x00000fff;
|
||||
|
||||
{
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = d;
|
||||
const u32 r3 = c;
|
||||
|
||||
#include COMPARE_M
|
||||
}
|
||||
COMPARE_M_SIMD (a, e, d, c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -320,43 +267,20 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -373,69 +297,53 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -446,28 +354,28 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -570,25 +478,11 @@ __kernel void m00190_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d += SHA1M_D;
|
||||
c += SHA1M_C;
|
||||
|
||||
{
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = d;
|
||||
const u32 r3 = c;
|
||||
|
||||
#include COMPARE_S
|
||||
}
|
||||
COMPARE_S_SIMD (a, e, d, c);
|
||||
|
||||
a &= 0x00000fff;
|
||||
|
||||
{
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = d;
|
||||
const u32 r3 = c;
|
||||
|
||||
#include COMPARE_S
|
||||
}
|
||||
COMPARE_S_SIMD (a, e, d, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MYSQL323_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,85 +36,50 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = wordl0[0] | wordr0[0];
|
||||
w_t[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -133,8 +98,10 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w_t[14] = wordl3[2] | wordr3[2];
|
||||
w_t[15] = 0;
|
||||
|
||||
u32 a = MYSQL323_A;
|
||||
u32 b = MYSQL323_B;
|
||||
u32x a = MYSQL323_A;
|
||||
u32x b = MYSQL323_B;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
u32 add = 7;
|
||||
|
||||
@ -181,12 +148,7 @@ __kernel void m00200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
a &= 0x7fffffff;
|
||||
b &= 0x7fffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, b, c, d);
|
||||
}
|
||||
}
|
||||
|
||||
@ -214,41 +176,20 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -265,46 +206,32 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = wordl0[0] | wordr0[0];
|
||||
w_t[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -323,8 +250,10 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w_t[14] = wordl3[2] | wordr3[2];
|
||||
w_t[15] = 0;
|
||||
|
||||
u32 a = MYSQL323_A;
|
||||
u32 b = MYSQL323_B;
|
||||
u32x a = MYSQL323_A;
|
||||
u32x b = MYSQL323_B;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
u32 add = 7;
|
||||
|
||||
@ -371,12 +300,7 @@ __kernel void m00200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
a &= 0x7fffffff;
|
||||
b &= 0x7fffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, b, c, d);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,110 +36,71 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -150,28 +111,28 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -394,13 +355,7 @@ __kernel void m00300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -428,43 +383,20 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -481,75 +413,59 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -560,28 +476,28 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -800,20 +716,14 @@ __kernel void m00300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, e, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD4_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,118 +36,81 @@ __kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
|
||||
@ -199,13 +162,7 @@ __kernel void m00900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,43 +190,19 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -286,77 +219,63 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
|
||||
@ -408,13 +327,7 @@ __kernel void m00900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD4_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,128 +36,91 @@ __kernel void m01000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
@ -209,13 +172,7 @@ __kernel void m01000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -243,43 +200,20 @@ __kernel void m01000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -296,87 +230,73 @@ __kernel void m01000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
@ -428,13 +348,7 @@ __kernel void m01000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD4_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,43 +36,20 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -104,87 +81,73 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
@ -314,13 +277,7 @@ __kernel void m01100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -348,43 +305,20 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -428,87 +362,73 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
@ -636,20 +556,12 @@ __kernel void m01100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23);
|
||||
MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20);
|
||||
|
||||
bool q_cond = allx (search[0] != a);
|
||||
|
||||
if (q_cond) continue;
|
||||
if (MATCHES_NONE_VS (a, search[0])) continue;
|
||||
|
||||
MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21);
|
||||
MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22);
|
||||
MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,92 +36,53 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -144,31 +105,31 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -238,13 +199,7 @@ __kernel void m01400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,43 +227,20 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -325,51 +257,35 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -392,31 +308,31 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -486,13 +402,7 @@ __kernel void m01400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,39 +72,25 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -166,14 +129,14 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
@ -198,31 +161,31 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -292,13 +255,7 @@ __kernel void m01410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -326,54 +283,31 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -397,39 +331,25 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -468,14 +388,14 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
@ -500,31 +420,31 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -594,13 +514,7 @@ __kernel void m01410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,43 +72,35 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -154,7 +123,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
@ -173,31 +142,31 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -267,13 +236,7 @@ __kernel void m01420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,54 +264,31 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -372,43 +312,35 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -431,7 +363,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
@ -450,31 +382,31 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -544,13 +476,7 @@ __kernel void m01420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,43 +72,35 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -186,10 +155,10 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32 w1_t2[4];
|
||||
u32 w2_t2[4];
|
||||
u32 w3_t2[4];
|
||||
u32x w0_t2[4];
|
||||
u32x w1_t2[4];
|
||||
u32x w2_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
make_unicode (w0, w0_t2, w1_t2);
|
||||
make_unicode (w1, w2_t2, w3_t2);
|
||||
@ -217,31 +186,31 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0_t2[0]);
|
||||
u32 w1_t = swap32 (w0_t2[1]);
|
||||
u32 w2_t = swap32 (w0_t2[2]);
|
||||
u32 w3_t = swap32 (w0_t2[3]);
|
||||
u32 w4_t = swap32 (w1_t2[0]);
|
||||
u32 w5_t = swap32 (w1_t2[1]);
|
||||
u32 w6_t = swap32 (w1_t2[2]);
|
||||
u32 w7_t = swap32 (w1_t2[3]);
|
||||
u32 w8_t = swap32 (w2_t2[0]);
|
||||
u32 w9_t = swap32 (w2_t2[1]);
|
||||
u32 wa_t = swap32 (w2_t2[2]);
|
||||
u32 wb_t = swap32 (w2_t2[3]);
|
||||
u32 wc_t = swap32 (w3_t2[0]);
|
||||
u32 wd_t = swap32 (w3_t2[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0_t2[0]);
|
||||
u32x w1_t = swap32 (w0_t2[1]);
|
||||
u32x w2_t = swap32 (w0_t2[2]);
|
||||
u32x w3_t = swap32 (w0_t2[3]);
|
||||
u32x w4_t = swap32 (w1_t2[0]);
|
||||
u32x w5_t = swap32 (w1_t2[1]);
|
||||
u32x w6_t = swap32 (w1_t2[2]);
|
||||
u32x w7_t = swap32 (w1_t2[3]);
|
||||
u32x w8_t = swap32 (w2_t2[0]);
|
||||
u32x w9_t = swap32 (w2_t2[1]);
|
||||
u32x wa_t = swap32 (w2_t2[2]);
|
||||
u32x wb_t = swap32 (w2_t2[3]);
|
||||
u32x wc_t = swap32 (w3_t2[0]);
|
||||
u32x wd_t = swap32 (w3_t2[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -311,13 +280,7 @@ __kernel void m01430_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,54 +308,31 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -416,43 +356,35 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -507,10 +439,10 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32 w1_t2[4];
|
||||
u32 w2_t2[4];
|
||||
u32 w3_t2[4];
|
||||
u32x w0_t2[4];
|
||||
u32x w1_t2[4];
|
||||
u32x w2_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
make_unicode (w0, w0_t2, w1_t2);
|
||||
make_unicode (w1, w2_t2, w3_t2);
|
||||
@ -538,31 +470,31 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0_t2[0]);
|
||||
u32 w1_t = swap32 (w0_t2[1]);
|
||||
u32 w2_t = swap32 (w0_t2[2]);
|
||||
u32 w3_t = swap32 (w0_t2[3]);
|
||||
u32 w4_t = swap32 (w1_t2[0]);
|
||||
u32 w5_t = swap32 (w1_t2[1]);
|
||||
u32 w6_t = swap32 (w1_t2[2]);
|
||||
u32 w7_t = swap32 (w1_t2[3]);
|
||||
u32 w8_t = swap32 (w2_t2[0]);
|
||||
u32 w9_t = swap32 (w2_t2[1]);
|
||||
u32 wa_t = swap32 (w2_t2[2]);
|
||||
u32 wb_t = swap32 (w2_t2[3]);
|
||||
u32 wc_t = swap32 (w3_t2[0]);
|
||||
u32 wd_t = swap32 (w3_t2[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0_t2[0]);
|
||||
u32x w1_t = swap32 (w0_t2[1]);
|
||||
u32x w2_t = swap32 (w0_t2[2]);
|
||||
u32x w3_t = swap32 (w0_t2[3]);
|
||||
u32x w4_t = swap32 (w1_t2[0]);
|
||||
u32x w5_t = swap32 (w1_t2[1]);
|
||||
u32x w6_t = swap32 (w1_t2[2]);
|
||||
u32x w7_t = swap32 (w1_t2[3]);
|
||||
u32x w8_t = swap32 (w2_t2[0]);
|
||||
u32x w9_t = swap32 (w2_t2[1]);
|
||||
u32x wa_t = swap32 (w2_t2[2]);
|
||||
u32x wb_t = swap32 (w2_t2[3]);
|
||||
u32x wc_t = swap32 (w3_t2[0]);
|
||||
u32x wd_t = swap32 (w3_t2[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -632,13 +564,7 @@ __kernel void m01430_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,54 +36,31 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -95,43 +72,35 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -156,10 +125,10 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32 w1_t2[4];
|
||||
u32 w2_t2[4];
|
||||
u32 w3_t2[4];
|
||||
u32x w0_t2[4];
|
||||
u32x w1_t2[4];
|
||||
u32x w2_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
make_unicode (w0, w0_t2, w1_t2);
|
||||
make_unicode (w1, w2_t2, w3_t2);
|
||||
@ -181,31 +150,31 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0_t2[0]);
|
||||
u32 w1_t = swap32 (w0_t2[1]);
|
||||
u32 w2_t = swap32 (w0_t2[2]);
|
||||
u32 w3_t = swap32 (w0_t2[3]);
|
||||
u32 w4_t = swap32 (w1_t2[0]);
|
||||
u32 w5_t = swap32 (w1_t2[1]);
|
||||
u32 w6_t = swap32 (w1_t2[2]);
|
||||
u32 w7_t = swap32 (w1_t2[3]);
|
||||
u32 w8_t = swap32 (w2_t2[0]);
|
||||
u32 w9_t = swap32 (w2_t2[1]);
|
||||
u32 wa_t = swap32 (w2_t2[2]);
|
||||
u32 wb_t = swap32 (w2_t2[3]);
|
||||
u32 wc_t = swap32 (w3_t2[0]);
|
||||
u32 wd_t = swap32 (w3_t2[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0_t2[0]);
|
||||
u32x w1_t = swap32 (w0_t2[1]);
|
||||
u32x w2_t = swap32 (w0_t2[2]);
|
||||
u32x w3_t = swap32 (w0_t2[3]);
|
||||
u32x w4_t = swap32 (w1_t2[0]);
|
||||
u32x w5_t = swap32 (w1_t2[1]);
|
||||
u32x w6_t = swap32 (w1_t2[2]);
|
||||
u32x w7_t = swap32 (w1_t2[3]);
|
||||
u32x w8_t = swap32 (w2_t2[0]);
|
||||
u32x w9_t = swap32 (w2_t2[1]);
|
||||
u32x wa_t = swap32 (w2_t2[2]);
|
||||
u32x wb_t = swap32 (w2_t2[3]);
|
||||
u32x wc_t = swap32 (w3_t2[0]);
|
||||
u32x wd_t = swap32 (w3_t2[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -275,13 +244,7 @@ __kernel void m01440_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -309,54 +272,31 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -380,43 +320,35 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -441,10 +373,10 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32 w1_t2[4];
|
||||
u32 w2_t2[4];
|
||||
u32 w3_t2[4];
|
||||
u32x w0_t2[4];
|
||||
u32x w1_t2[4];
|
||||
u32x w2_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
make_unicode (w0, w0_t2, w1_t2);
|
||||
make_unicode (w1, w2_t2, w3_t2);
|
||||
@ -466,31 +398,31 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha256
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0_t2[0]);
|
||||
u32 w1_t = swap32 (w0_t2[1]);
|
||||
u32 w2_t = swap32 (w0_t2[2]);
|
||||
u32 w3_t = swap32 (w0_t2[3]);
|
||||
u32 w4_t = swap32 (w1_t2[0]);
|
||||
u32 w5_t = swap32 (w1_t2[1]);
|
||||
u32 w6_t = swap32 (w1_t2[2]);
|
||||
u32 w7_t = swap32 (w1_t2[3]);
|
||||
u32 w8_t = swap32 (w2_t2[0]);
|
||||
u32 w9_t = swap32 (w2_t2[1]);
|
||||
u32 wa_t = swap32 (w2_t2[2]);
|
||||
u32 wb_t = swap32 (w2_t2[3]);
|
||||
u32 wc_t = swap32 (w3_t2[0]);
|
||||
u32 wd_t = swap32 (w3_t2[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_salt_len * 8;
|
||||
u32x w0_t = swap32 (w0_t2[0]);
|
||||
u32x w1_t = swap32 (w0_t2[1]);
|
||||
u32x w2_t = swap32 (w0_t2[2]);
|
||||
u32x w3_t = swap32 (w0_t2[3]);
|
||||
u32x w4_t = swap32 (w1_t2[0]);
|
||||
u32x w5_t = swap32 (w1_t2[1]);
|
||||
u32x w6_t = swap32 (w1_t2[2]);
|
||||
u32x w7_t = swap32 (w1_t2[3]);
|
||||
u32x w8_t = swap32 (w2_t2[0]);
|
||||
u32x w9_t = swap32 (w2_t2[1]);
|
||||
u32x wa_t = swap32 (w2_t2[2]);
|
||||
u32x wb_t = swap32 (w2_t2[3]);
|
||||
u32x wc_t = swap32 (w3_t2[0]);
|
||||
u32x wd_t = swap32 (w3_t2[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_salt_len * 8;
|
||||
|
||||
u32 a = SHA256M_A;
|
||||
u32 b = SHA256M_B;
|
||||
u32 c = SHA256M_C;
|
||||
u32 d = SHA256M_D;
|
||||
u32 e = SHA256M_E;
|
||||
u32 f = SHA256M_F;
|
||||
u32 g = SHA256M_G;
|
||||
u32 h = SHA256M_H;
|
||||
u32x a = SHA256M_A;
|
||||
u32x b = SHA256M_B;
|
||||
u32x c = SHA256M_C;
|
||||
u32x d = SHA256M_D;
|
||||
u32x e = SHA256M_E;
|
||||
u32x f = SHA256M_F;
|
||||
u32x g = SHA256M_G;
|
||||
u32x h = SHA256M_H;
|
||||
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00);
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01);
|
||||
@ -560,13 +492,7 @@ __kernel void m01440_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 k_sha256[64] =
|
||||
{
|
||||
@ -243,41 +243,20 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -302,67 +281,53 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -373,28 +338,28 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -427,12 +392,7 @@ __kernel void m01450_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -460,41 +420,20 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -531,67 +470,53 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -602,28 +527,28 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -656,12 +581,7 @@ __kernel void m01450_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 k_sha256[64] =
|
||||
{
|
||||
@ -243,41 +243,20 @@ __kernel void m01460_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -337,67 +316,53 @@ __kernel void m01460_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -427,12 +392,7 @@ __kernel void m01460_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -460,41 +420,20 @@ __kernel void m01460_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -566,67 +505,53 @@ __kernel void m01460_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -656,12 +581,7 @@ __kernel void m01460_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _DES_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
@ -520,7 +520,7 @@ __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -565,69 +565,49 @@ __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 8) ? 8 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -648,12 +628,10 @@ __kernel void m01500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
@ -711,7 +689,7 @@ __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -768,69 +746,49 @@ __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 8) ? 8 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -851,12 +809,10 @@ __kernel void m01500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -157,92 +157,53 @@ __kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -265,10 +226,10 @@ __kernel void m01700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
@ -334,43 +295,20 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -387,51 +325,35 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
append_0x80_2x4 (wordr0, wordr1, pw_r_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -454,10 +376,10 @@ __kernel void m01700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -157,54 +157,31 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -216,39 +193,25 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -287,14 +250,14 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
@ -319,10 +282,10 @@ __kernel void m01710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
@ -388,54 +351,31 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -459,39 +399,25 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -530,14 +456,14 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
@ -562,10 +488,10 @@ __kernel void m01710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -157,54 +157,31 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -216,43 +193,35 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -275,7 +244,7 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
@ -294,10 +263,10 @@ __kernel void m01720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
@ -363,54 +332,31 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -434,43 +380,35 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -493,7 +431,7 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len);
|
||||
|
||||
@ -512,10 +450,10 @@ __kernel void m01720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha512
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -157,54 +157,31 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -216,43 +193,35 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -307,10 +276,10 @@ __kernel void m01730_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -402,54 +371,31 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -473,43 +419,35 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -564,10 +502,10 @@ __kernel void m01730_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -157,54 +157,31 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -216,43 +193,35 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -277,10 +246,10 @@ __kernel void m01740_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -366,54 +335,31 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -437,43 +383,35 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -498,10 +436,10 @@ __kernel void m01740_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -274,41 +274,20 @@ __kernel void m01750_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -333,67 +312,53 @@ __kernel void m01750_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -404,28 +369,28 @@ __kernel void m01750_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -492,41 +457,20 @@ __kernel void m01750_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -563,67 +507,53 @@ __kernel void m01750_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -634,28 +564,28 @@ __kernel void m01750_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha512[80] =
|
||||
{
|
||||
@ -274,41 +274,20 @@ __kernel void m01760_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -368,67 +347,53 @@ __kernel void m01760_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -492,41 +457,20 @@ __kernel void m01760_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -598,67 +542,53 @@ __kernel void m01760_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,106 +36,71 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -147,10 +112,10 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[0] = 0x80;
|
||||
w3[2] = 16 * 8;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -224,13 +189,7 @@ __kernel void m02400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d &= 0x00ffffff;
|
||||
c &= 0x00ffffff;
|
||||
b &= 0x00ffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -258,41 +217,20 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -309,67 +247,53 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -381,10 +305,10 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[0] = 0x80;
|
||||
w3[2] = 16 * 8;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -463,13 +387,7 @@ __kernel void m02400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d &= 0x00ffffff;
|
||||
c &= 0x00ffffff;
|
||||
b &= 0x00ffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,41 +36,20 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -88,39 +67,25 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -159,32 +124,32 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2] | s0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3] | s0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -196,10 +161,10 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[0] = 0x80;
|
||||
w3[2] = 16 * 8;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -273,13 +238,7 @@ __kernel void m02410_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d &= 0x00ffffff;
|
||||
c &= 0x00ffffff;
|
||||
b &= 0x00ffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -307,41 +266,20 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -371,39 +309,25 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -442,32 +366,32 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
s3[2] = 0;
|
||||
s3[3] = 0;
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len);
|
||||
switch_buffer_by_offset_le_S (s0, s1, s2, s3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0] | s0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1] | s0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2] | s0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3] | s0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -479,10 +403,10 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1[0] = 0x80;
|
||||
w3[2] = 16 * 8;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -561,13 +485,7 @@ __kernel void m02410_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
d &= 0x00ffffff;
|
||||
c &= 0x00ffffff;
|
||||
b &= 0x00ffffff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -36,43 +36,20 @@ __kernel void m02610_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -113,77 +90,63 @@ __kernel void m02610_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -357,12 +320,7 @@ __kernel void m02610_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -388,43 +346,20 @@ __kernel void m02610_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -477,77 +412,63 @@ __kernel void m02610_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -720,13 +641,7 @@ __kernel void m02610_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -36,43 +36,20 @@ __kernel void m02710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -115,77 +92,63 @@ __kernel void m02710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -441,13 +404,7 @@ __kernel void m02710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -473,43 +430,20 @@ __kernel void m02710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -564,77 +498,63 @@ __kernel void m02710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -893,13 +813,7 @@ __kernel void m02710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -36,43 +36,20 @@ __kernel void m02810_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -114,77 +91,63 @@ __kernel void m02810_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -440,13 +403,7 @@ __kernel void m02810_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -472,43 +429,20 @@ __kernel void m02810_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -562,77 +496,63 @@ __kernel void m02810_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -891,13 +811,7 @@ __kernel void m02810_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _DES_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
@ -517,7 +517,7 @@ __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -556,69 +556,55 @@ __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 7) ? 7 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -646,12 +632,10 @@ __kernel void m03000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
@ -709,7 +693,7 @@ __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -760,69 +744,49 @@ __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 7) ? 7 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -850,12 +814,10 @@ __kernel void m03000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _DES_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
@ -558,7 +558,7 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -566,14 +566,12 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -585,69 +583,55 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
const u32 salt_word_len = (salt_len + pw_len) * 2;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -658,10 +642,10 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
@ -801,12 +785,10 @@ __kernel void m03100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* cmp
|
||||
*/
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
@ -864,54 +846,31 @@ __kernel void m03100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
|
||||
u32 salt_buf0[4];
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
|
||||
u32 salt_buf1[4];
|
||||
|
||||
salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4];
|
||||
salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5];
|
||||
salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6];
|
||||
@ -935,69 +894,55 @@ __kernel void m03100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
const u32 salt_word_len = (salt_len + pw_len) * 2;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -1008,10 +953,10 @@ __kernel void m03100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
@ -1151,12 +1096,10 @@ __kernel void m03100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* cmp
|
||||
*/
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,43 +55,20 @@ __kernel void m03710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -132,77 +109,63 @@ __kernel void m03710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -277,10 +240,10 @@ __kernel void m03710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c += MD5M_C;
|
||||
d += MD5M_D;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
|
||||
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
|
||||
@ -410,13 +373,7 @@ __kernel void m03710_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,43 +418,20 @@ __kernel void m03710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -550,77 +484,63 @@ __kernel void m03710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -695,10 +615,10 @@ __kernel void m03710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c += MD5M_C;
|
||||
d += MD5M_D;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
|
||||
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
|
||||
@ -828,13 +748,7 @@ __kernel void m03710_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -34,41 +34,20 @@ __kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -107,67 +86,53 @@ __kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -178,28 +143,28 @@ __kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
@ -286,10 +251,10 @@ __kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -359,13 +324,7 @@ __kernel void m03800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,41 +350,20 @@ __kernel void m03800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -476,67 +414,53 @@ __kernel void m03800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -547,28 +471,28 @@ __kernel void m03800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
@ -655,10 +579,10 @@ __kernel void m03800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -727,13 +651,7 @@ __kernel void m03800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,43 +55,20 @@ __kernel void m04310_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -113,77 +90,63 @@ __kernel void m04310_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -356,13 +319,7 @@ __kernel void m04310_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -407,43 +364,20 @@ __kernel void m04310_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -477,77 +411,63 @@ __kernel void m04310_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -720,13 +640,7 @@ __kernel void m04310_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,80 +55,43 @@ __kernel void m04400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -137,28 +100,28 @@ __kernel void m04400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -169,28 +132,28 @@ __kernel void m04400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -398,13 +361,7 @@ __kernel void m04400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -449,43 +406,20 @@ __kernel void m04400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -502,39 +436,25 @@ __kernel void m04400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -543,28 +463,28 @@ __kernel void m04400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -575,28 +495,28 @@ __kernel void m04400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -804,13 +724,7 @@ __kernel void m04400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8_le(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,80 +55,43 @@ __kernel void m04500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -137,28 +100,28 @@ __kernel void m04500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -169,28 +132,28 @@ __kernel void m04500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -427,13 +390,7 @@ __kernel void m04500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -478,43 +435,20 @@ __kernel void m04500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -531,45 +465,31 @@ __kernel void m04500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -578,28 +498,28 @@ __kernel void m04500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -610,28 +530,28 @@ __kernel void m04500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -865,19 +785,13 @@ __kernel void m04500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -17,9 +19,7 @@
|
||||
#undef _MD5_
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8_le(i) l_bin2asc[(i)]
|
||||
|
||||
@ -56,80 +56,43 @@ __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -138,28 +101,28 @@ __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -170,10 +133,10 @@ __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -252,31 +215,31 @@ __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
|
||||
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
|
||||
u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
|
||||
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
|
||||
u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
|
||||
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
|
||||
u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
|
||||
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
|
||||
u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
|
||||
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
|
||||
u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
|
||||
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
|
||||
u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
|
||||
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
|
||||
u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
|
||||
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
|
||||
|
||||
u32 w8_t = 0x80000000;
|
||||
u32 w9_t = 0;
|
||||
u32 wa_t = 0;
|
||||
u32 wb_t = 0;
|
||||
u32 wc_t = 0;
|
||||
u32 wd_t = 0;
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = 32 * 8;
|
||||
u32x w8_t = 0x80000000;
|
||||
u32x w9_t = 0;
|
||||
u32x wa_t = 0;
|
||||
u32x wb_t = 0;
|
||||
u32x wc_t = 0;
|
||||
u32x wd_t = 0;
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = 32 * 8;
|
||||
|
||||
u32 e;
|
||||
|
||||
@ -381,13 +344,7 @@ __kernel void m04700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -432,43 +389,20 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -485,45 +419,31 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -532,28 +452,28 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -564,10 +484,10 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -646,31 +566,31 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
|
||||
u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((a >> 0) & 255) << 16;
|
||||
u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
|
||||
u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((a >> 16) & 255) << 16;
|
||||
u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
|
||||
u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((b >> 0) & 255) << 16;
|
||||
u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
|
||||
u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((b >> 16) & 255) << 16;
|
||||
u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
|
||||
u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((c >> 0) & 255) << 16;
|
||||
u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
|
||||
u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((c >> 16) & 255) << 16;
|
||||
u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
|
||||
u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((d >> 0) & 255) << 16;
|
||||
u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
|
||||
u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8_le ((d >> 16) & 255) << 16;
|
||||
|
||||
u32 w8_t = 0x80000000;
|
||||
u32 w9_t = 0;
|
||||
u32 wa_t = 0;
|
||||
u32 wb_t = 0;
|
||||
u32 wc_t = 0;
|
||||
u32 wd_t = 0;
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = 32 * 8;
|
||||
u32x w8_t = 0x80000000;
|
||||
u32x w9_t = 0;
|
||||
u32x wa_t = 0;
|
||||
u32x wb_t = 0;
|
||||
u32x wc_t = 0;
|
||||
u32x wd_t = 0;
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = 32 * 8;
|
||||
|
||||
u32 e;
|
||||
|
||||
@ -772,19 +692,13 @@ __kernel void m04700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t);
|
||||
wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t);
|
||||
wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t);
|
||||
we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t);
|
||||
wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,41 +36,20 @@ __kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -89,67 +68,53 @@ __kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -198,7 +163,7 @@ __kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, 1 + pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0[0] |= s0[0];
|
||||
w0[1] |= s0[1];
|
||||
@ -221,10 +186,10 @@ __kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -293,13 +258,7 @@ __kernel void m04800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -327,41 +286,20 @@ __kernel void m04800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -392,67 +330,53 @@ __kernel void m04800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -501,7 +425,7 @@ __kernel void m04800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
switch_buffer_by_offset_le (s0, s1, s2, s3, 1 + pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0[0] |= s0[0];
|
||||
w0[1] |= s0[1];
|
||||
@ -524,10 +448,10 @@ __kernel void m04800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -594,20 +518,12 @@ __kernel void m04800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33);
|
||||
MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30);
|
||||
|
||||
bool q_cond = allx (search[0] != a);
|
||||
|
||||
if (q_cond) continue;
|
||||
if (MATCHES_NONE_VS (a, search[0])) continue;
|
||||
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,41 +36,20 @@ __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -109,67 +88,53 @@ __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = wordl0[0] | wordr0[0];
|
||||
w0_t[1] = wordl0[1] | wordr0[1];
|
||||
w0_t[2] = wordl0[2] | wordr0[2];
|
||||
w0_t[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = wordl1[0] | wordr1[0];
|
||||
w1_t[1] = wordl1[1] | wordr1[1];
|
||||
w1_t[2] = wordl1[2] | wordr1[2];
|
||||
w1_t[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = wordl2[0] | wordr2[0];
|
||||
w2_t[1] = wordl2[1] | wordr2[1];
|
||||
w2_t[2] = wordl2[2] | wordr2[2];
|
||||
w2_t[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = wordl3[0] | wordr3[0];
|
||||
w3_t[1] = wordl3[1] | wordr3[1];
|
||||
@ -254,32 +219,32 @@ __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
|
||||
|
||||
u32 w0 = swap32 (w0_t[0]);
|
||||
u32 w1 = swap32 (w0_t[1]);
|
||||
u32 w2 = swap32 (w0_t[2]);
|
||||
u32 w3 = swap32 (w0_t[3]);
|
||||
u32 w4 = swap32 (w1_t[0]);
|
||||
u32 w5 = swap32 (w1_t[1]);
|
||||
u32 w6 = swap32 (w1_t[2]);
|
||||
u32 w7 = swap32 (w1_t[3]);
|
||||
u32 w8 = swap32 (w2_t[0]);
|
||||
u32 w9 = swap32 (w2_t[1]);
|
||||
u32 wa = swap32 (w2_t[2]);
|
||||
u32 wb = swap32 (w2_t[3]);
|
||||
u32 wc = swap32 (w3_t[0]);
|
||||
u32 wd = swap32 (w3_t[1]);
|
||||
u32 we = 0;
|
||||
u32 wf = pw_salt_len * 8;
|
||||
u32x w0 = swap32 (w0_t[0]);
|
||||
u32x w1 = swap32 (w0_t[1]);
|
||||
u32x w2 = swap32 (w0_t[2]);
|
||||
u32x w3 = swap32 (w0_t[3]);
|
||||
u32x w4 = swap32 (w1_t[0]);
|
||||
u32x w5 = swap32 (w1_t[1]);
|
||||
u32x w6 = swap32 (w1_t[2]);
|
||||
u32x w7 = swap32 (w1_t[3]);
|
||||
u32x w8 = swap32 (w2_t[0]);
|
||||
u32x w9 = swap32 (w2_t[1]);
|
||||
u32x wa = swap32 (w2_t[2]);
|
||||
u32x wb = swap32 (w2_t[3]);
|
||||
u32x wc = swap32 (w3_t[0]);
|
||||
u32x wd = swap32 (w3_t[1]);
|
||||
u32x we = 0;
|
||||
u32x wf = pw_salt_len * 8;
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -376,13 +341,7 @@ __kernel void m04900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
|
||||
we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
|
||||
wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -410,41 +369,20 @@ __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -495,73 +433,59 @@ __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = wordl0[0] | wordr0[0];
|
||||
w0_t[1] = wordl0[1] | wordr0[1];
|
||||
w0_t[2] = wordl0[2] | wordr0[2];
|
||||
w0_t[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = wordl1[0] | wordr1[0];
|
||||
w1_t[1] = wordl1[1] | wordr1[1];
|
||||
w1_t[2] = wordl1[2] | wordr1[2];
|
||||
w1_t[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = wordl2[0] | wordr2[0];
|
||||
w2_t[1] = wordl2[1] | wordr2[1];
|
||||
w2_t[2] = wordl2[2] | wordr2[2];
|
||||
w2_t[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = wordl3[0] | wordr3[0];
|
||||
w3_t[1] = wordl3[1] | wordr3[1];
|
||||
@ -646,32 +570,32 @@ __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_4x4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len);
|
||||
|
||||
u32 w0 = swap32 (w0_t[0]);
|
||||
u32 w1 = swap32 (w0_t[1]);
|
||||
u32 w2 = swap32 (w0_t[2]);
|
||||
u32 w3 = swap32 (w0_t[3]);
|
||||
u32 w4 = swap32 (w1_t[0]);
|
||||
u32 w5 = swap32 (w1_t[1]);
|
||||
u32 w6 = swap32 (w1_t[2]);
|
||||
u32 w7 = swap32 (w1_t[3]);
|
||||
u32 w8 = swap32 (w2_t[0]);
|
||||
u32 w9 = swap32 (w2_t[1]);
|
||||
u32 wa = swap32 (w2_t[2]);
|
||||
u32 wb = swap32 (w2_t[3]);
|
||||
u32 wc = swap32 (w3_t[0]);
|
||||
u32 wd = swap32 (w3_t[1]);
|
||||
u32 we = 0;
|
||||
u32 wf = pw_salt_len * 8;
|
||||
u32x w0 = swap32 (w0_t[0]);
|
||||
u32x w1 = swap32 (w0_t[1]);
|
||||
u32x w2 = swap32 (w0_t[2]);
|
||||
u32x w3 = swap32 (w0_t[3]);
|
||||
u32x w4 = swap32 (w1_t[0]);
|
||||
u32x w5 = swap32 (w1_t[1]);
|
||||
u32x w6 = swap32 (w1_t[2]);
|
||||
u32x w7 = swap32 (w1_t[3]);
|
||||
u32x w8 = swap32 (w2_t[0]);
|
||||
u32x w9 = swap32 (w2_t[1]);
|
||||
u32x wa = swap32 (w2_t[2]);
|
||||
u32x wb = swap32 (w2_t[3]);
|
||||
u32x wc = swap32 (w3_t[0]);
|
||||
u32x wd = swap32 (w3_t[1]);
|
||||
u32x we = 0;
|
||||
u32x wf = pw_salt_len * 8;
|
||||
|
||||
/**
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -765,19 +689,13 @@ __kernel void m04900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa);
|
||||
wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc);
|
||||
wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd);
|
||||
we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we);
|
||||
wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _KECCAK_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 keccakf_rndc[24] =
|
||||
{
|
||||
@ -102,43 +102,20 @@ __kernel void m05000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x01_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* 0x80 keccak, very special
|
||||
*/
|
||||
@ -153,39 +130,25 @@ __kernel void m05000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -194,28 +157,28 @@ __kernel void m05000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -363,43 +326,20 @@ __kernel void m05000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x01_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -426,39 +366,25 @@ __kernel void m05000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -467,28 +393,28 @@ __kernel void m05000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5H_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m05100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,118 +36,81 @@ __kernel void m05100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -319,86 +282,72 @@ __kernel void m05100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
append_0x80_2x4_S (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
|
||||
{
|
||||
@ -248,106 +248,71 @@ __kernel void m05300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -358,28 +323,28 @@ __kernel void m05300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
@ -475,12 +440,7 @@ __kernel void m05300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[0];
|
||||
const u32 r1 = digest[3];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -537,41 +497,20 @@ __kernel void m05300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -588,67 +527,53 @@ __kernel void m05300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -659,28 +584,28 @@ __kernel void m05300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -776,12 +701,7 @@ __kernel void m05300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[0];
|
||||
const u32 r1 = digest[3];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -282,106 +282,71 @@ __kernel void m05400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -392,28 +357,28 @@ __kernel void m05400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -509,12 +474,7 @@ __kernel void m05400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -571,41 +531,20 @@ __kernel void m05400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -622,67 +561,53 @@ __kernel void m05400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -693,28 +618,28 @@ __kernel void m05400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -810,12 +735,7 @@ __kernel void m05400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _MD4_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
@ -528,43 +528,20 @@ __kernel void m05500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -582,87 +559,73 @@ __kernel void m05500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
@ -822,43 +785,20 @@ __kernel void m05500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -888,87 +828,73 @@ __kernel void m05500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
w3_t[2] = pw_len * 8 * 2;
|
||||
|
||||
u32 a = MD4M_A;
|
||||
u32 b = MD4M_B;
|
||||
u32 c = MD4M_C;
|
||||
u32 d = MD4M_D;
|
||||
u32x a = MD4M_A;
|
||||
u32x b = MD4M_B;
|
||||
u32x c = MD4M_C;
|
||||
u32x d = MD4M_D;
|
||||
|
||||
MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00);
|
||||
MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _NETNTLMV2_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
|
||||
{
|
||||
@ -326,118 +326,81 @@ __kernel void m05600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -589,12 +552,7 @@ __kernel void m05600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[0];
|
||||
const u32 r1 = digest[3];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[0], digest[3], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -648,43 +606,20 @@ __kernel void m05600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -701,77 +636,63 @@ __kernel void m05600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -923,12 +844,7 @@ __kernel void m05600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[0];
|
||||
const u32 r1 = digest[3];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[0], digest[3], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _RIPEMD160_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void ripemd160_transform (const u32 w[16], u32 dgst[5])
|
||||
{
|
||||
@ -233,115 +233,78 @@ __kernel void m06000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 wl[16];
|
||||
u32x wl[16];
|
||||
|
||||
wl[ 0] = w0[0];
|
||||
wl[ 1] = w0[1];
|
||||
@ -403,43 +366,20 @@ __kernel void m06000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -456,74 +396,60 @@ __kernel void m06000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = pw_len * 8;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 wl[16];
|
||||
u32x wl[16];
|
||||
|
||||
wl[ 0] = w0[0];
|
||||
wl[ 1] = w0[1];
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _WHIRLPOOL_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define R 10
|
||||
|
||||
@ -1381,115 +1381,78 @@ __kernel void m06100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 wl[16];
|
||||
u32x wl[16];
|
||||
|
||||
wl[ 0] = swap32 (w0[0]);
|
||||
wl[ 1] = swap32 (w0[1]);
|
||||
@ -1592,43 +1555,20 @@ __kernel void m06100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -1645,74 +1585,60 @@ __kernel void m06100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
u32 wl[16];
|
||||
u32x wl[16];
|
||||
|
||||
wl[ 0] = swap32 (w0[0]);
|
||||
wl[ 1] = swap32 (w0[1]);
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _GOST_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 c_tables[4][256] =
|
||||
{
|
||||
@ -727,106 +727,71 @@ __kernel void m06900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -1029,41 +994,20 @@ __kernel void m06900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -1080,67 +1024,53 @@ __kernel void m06900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -253,41 +253,20 @@ __kernel void m07300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -298,67 +277,53 @@ __kernel void m07300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -369,28 +334,28 @@ __kernel void m07300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -450,12 +415,7 @@ __kernel void m07300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -483,41 +443,20 @@ __kernel void m07300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -540,67 +479,53 @@ __kernel void m07300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -611,28 +536,28 @@ __kernel void m07300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* pads
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -692,12 +617,7 @@ __kernel void m07300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _KRB5PA_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -559,41 +561,20 @@ __kernel void m07500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -620,67 +601,53 @@ __kernel void m07500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -736,41 +703,20 @@ __kernel void m07500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -797,67 +743,53 @@ __kernel void m07500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,43 +55,20 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -132,39 +109,25 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -173,28 +136,28 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -205,28 +168,28 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -334,7 +297,7 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* Prepend salt
|
||||
*/
|
||||
|
||||
u32 w0t[4];
|
||||
u32x w0t[4];
|
||||
|
||||
w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
|
||||
@ -345,7 +308,7 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
|
||||
|
||||
u32 w1t[4];
|
||||
u32x w1t[4];
|
||||
|
||||
w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
|
||||
@ -356,7 +319,7 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
|
||||
|
||||
u32 w2t[2];
|
||||
u32x w2t[2];
|
||||
|
||||
w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
|
||||
@ -637,13 +600,7 @@ __kernel void m07600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
e += r_e;
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -688,43 +645,20 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -777,45 +711,31 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -824,28 +744,28 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -856,28 +776,28 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -985,7 +905,7 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* Prepend salt
|
||||
*/
|
||||
|
||||
u32 w0t[4];
|
||||
u32x w0t[4];
|
||||
|
||||
w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((a >> 16) & 255) << 16;
|
||||
@ -996,7 +916,7 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8 ((b >> 0) & 255) << 16;
|
||||
|
||||
u32 w1t[4];
|
||||
u32x w1t[4];
|
||||
|
||||
w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((c >> 16) & 255) << 16;
|
||||
@ -1007,7 +927,7 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0
|
||||
| uint_to_hex_lower8 ((d >> 0) & 255) << 16;
|
||||
|
||||
u32 w2t[2];
|
||||
u32x w2t[2];
|
||||
|
||||
w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0
|
||||
| uint_to_hex_lower8 ((e >> 16) & 255) << 16;
|
||||
@ -1288,13 +1208,7 @@ __kernel void m07600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
e += r_e;
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _SHA256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 k_sha256[64] =
|
||||
{
|
||||
@ -289,78 +289,49 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -379,16 +350,16 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = swap32 (w0_t[0]);
|
||||
w_t[ 1] = swap32 (w0_t[1]);
|
||||
@ -445,12 +416,7 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
sha256_transform_s (digest, w_s1); // 448 - 512
|
||||
sha256_transform_s (digest, w_s2); // 512 - 576
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -525,41 +491,20 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -576,39 +521,31 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -627,16 +564,16 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = swap32 (w0_t[0]);
|
||||
w_t[ 1] = swap32 (w0_t[1]);
|
||||
@ -693,12 +630,7 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
sha256_transform_s (digest, w_s1); // 448 - 512
|
||||
sha256_transform_s (digest, w_s2); // 512 - 576
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[7];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[6];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[7], digest[2], digest[6]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,41 +36,20 @@ __kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -86,43 +65,35 @@ __kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -145,12 +116,12 @@ __kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
@ -192,11 +163,11 @@ __kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -293,13 +264,7 @@ __kernel void m08100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -327,41 +292,20 @@ __kernel void m08100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -389,49 +333,41 @@ __kernel void m08100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* reverse
|
||||
*/
|
||||
|
||||
const u32 e_rev = rotl32 (search[1], 2u);
|
||||
const u32 e_rev = rotl32_S (search[1], 2u);
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -454,12 +390,12 @@ __kernel void m08100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* prepend salt
|
||||
*/
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
@ -501,11 +437,11 @@ __kernel void m08100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
//w3_t[2] = swap32 (w3_t[2]);
|
||||
//w3_t[3] = swap32 (w3_t[3]);
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -600,18 +536,12 @@ __kernel void m08100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]);
|
||||
w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]);
|
||||
|
||||
if (allx (e != e_rev)) continue;
|
||||
if (MATCHES_NONE_VS (e, e_rev)) continue;
|
||||
|
||||
w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]);
|
||||
w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]);
|
||||
w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -164,41 +164,20 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -241,43 +220,35 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -296,28 +267,28 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
@ -396,28 +367,28 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32x w0_t2[4];
|
||||
|
||||
w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
|
||||
w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
|
||||
w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
|
||||
w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
|
||||
|
||||
u32 w1_t2[4];
|
||||
u32x w1_t2[4];
|
||||
|
||||
w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
|
||||
w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
|
||||
w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
|
||||
w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
|
||||
|
||||
u32 w2_t2[4];
|
||||
u32x w2_t2[4];
|
||||
|
||||
w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
|
||||
w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
|
||||
w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
|
||||
w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
|
||||
|
||||
u32 w3_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
|
||||
w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
|
||||
@ -438,28 +409,28 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
for (u32 i = 0; i < salt_iter; i++)
|
||||
{
|
||||
u32 w0_t3[4];
|
||||
u32x w0_t3[4];
|
||||
|
||||
w0_t3[0] = digest[0];
|
||||
w0_t3[1] = digest[1];
|
||||
w0_t3[2] = digest[2];
|
||||
w0_t3[3] = digest[3];
|
||||
|
||||
u32 w1_t3[4];
|
||||
u32x w1_t3[4];
|
||||
|
||||
w1_t3[0] = digest[4];
|
||||
w1_t3[1] = swap32 (salt_buf0[0]);
|
||||
w1_t3[2] = swap32 (salt_buf0[1]);
|
||||
w1_t3[3] = swap32 (salt_buf0[2]);
|
||||
|
||||
u32 w2_t3[4];
|
||||
u32x w2_t3[4];
|
||||
|
||||
w2_t3[0] = swap32 (salt_buf0[3]);
|
||||
w2_t3[1] = swap32 (salt_buf1[0]);
|
||||
w2_t3[2] = swap32 (salt_buf1[1]);
|
||||
w2_t3[3] = swap32 (salt_buf1[2]);
|
||||
|
||||
u32 w3_t3[4];
|
||||
u32x w3_t3[4];
|
||||
|
||||
w3_t3[0] = swap32 (salt_buf1[3]);
|
||||
w3_t3[1] = 0;
|
||||
@ -475,12 +446,7 @@ __kernel void m08300_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest);
|
||||
}
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -508,41 +474,20 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -597,43 +542,35 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -652,28 +589,28 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = w0[0];
|
||||
w0_t[1] = w0[1];
|
||||
w0_t[2] = w0[2];
|
||||
w0_t[3] = w0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = w1[0];
|
||||
w1_t[1] = w1[1];
|
||||
w1_t[2] = w1[2];
|
||||
w1_t[3] = w1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = w2[0];
|
||||
w2_t[1] = w2[1];
|
||||
w2_t[2] = w2[2];
|
||||
w2_t[3] = w2[3];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = w3[0];
|
||||
w3_t[1] = w3[1];
|
||||
@ -752,28 +689,28 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t2[4];
|
||||
u32x w0_t2[4];
|
||||
|
||||
w0_t2[0] = swap32 (w0_t[0] | d0[0] | s0[0]);
|
||||
w0_t2[1] = swap32 (w0_t[1] | d0[1] | s0[1]);
|
||||
w0_t2[2] = swap32 (w0_t[2] | d0[2] | s0[2]);
|
||||
w0_t2[3] = swap32 (w0_t[3] | d0[3] | s0[3]);
|
||||
|
||||
u32 w1_t2[4];
|
||||
u32x w1_t2[4];
|
||||
|
||||
w1_t2[0] = swap32 (w1_t[0] | d1[0] | s1[0]);
|
||||
w1_t2[1] = swap32 (w1_t[1] | d1[1] | s1[1]);
|
||||
w1_t2[2] = swap32 (w1_t[2] | d1[2] | s1[2]);
|
||||
w1_t2[3] = swap32 (w1_t[3] | d1[3] | s1[3]);
|
||||
|
||||
u32 w2_t2[4];
|
||||
u32x w2_t2[4];
|
||||
|
||||
w2_t2[0] = swap32 (w2_t[0] | d2[0] | s2[0]);
|
||||
w2_t2[1] = swap32 (w2_t[1] | d2[1] | s2[1]);
|
||||
w2_t2[2] = swap32 (w2_t[2] | d2[2] | s2[2]);
|
||||
w2_t2[3] = swap32 (w2_t[3] | d2[3] | s2[3]);
|
||||
|
||||
u32 w3_t2[4];
|
||||
u32x w3_t2[4];
|
||||
|
||||
w3_t2[0] = swap32 (w3_t[0] | d3[0] | s3[0]);
|
||||
w3_t2[1] = swap32 (w3_t[1] | d3[1] | s3[1]);
|
||||
@ -794,28 +731,28 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
for (u32 i = 0; i < salt_iter; i++)
|
||||
{
|
||||
u32 w0_t3[4];
|
||||
u32x w0_t3[4];
|
||||
|
||||
w0_t3[0] = digest[0];
|
||||
w0_t3[1] = digest[1];
|
||||
w0_t3[2] = digest[2];
|
||||
w0_t3[3] = digest[3];
|
||||
|
||||
u32 w1_t3[4];
|
||||
u32x w1_t3[4];
|
||||
|
||||
w1_t3[0] = digest[4];
|
||||
w1_t3[1] = swap32 (salt_buf0[0]);
|
||||
w1_t3[2] = swap32 (salt_buf0[1]);
|
||||
w1_t3[3] = swap32 (salt_buf0[2]);
|
||||
|
||||
u32 w2_t3[4];
|
||||
u32x w2_t3[4];
|
||||
|
||||
w2_t3[0] = swap32 (salt_buf0[3]);
|
||||
w2_t3[1] = swap32 (salt_buf1[0]);
|
||||
w2_t3[2] = swap32 (salt_buf1[1]);
|
||||
w2_t3[3] = swap32 (salt_buf1[2]);
|
||||
|
||||
u32 w3_t3[4];
|
||||
u32x w3_t3[4];
|
||||
|
||||
w3_t3[0] = swap32 (salt_buf1[3]);
|
||||
w3_t3[1] = 0;
|
||||
@ -831,12 +768,7 @@ __kernel void m08300_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest);
|
||||
}
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8_le(i) l_bin2asc[(i)]
|
||||
|
||||
@ -183,43 +183,20 @@ __kernel void m08400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -251,39 +228,25 @@ __kernel void m08400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -292,56 +255,56 @@ __kernel void m08400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = swap32 (w2[0]);
|
||||
w2_t[1] = swap32 (w2[1]);
|
||||
w2_t[2] = swap32 (w2[2]);
|
||||
w2_t[3] = swap32 (w2[3]);
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = swap32 (w3[0]);
|
||||
w3_t[1] = swap32 (w3[1]);
|
||||
@ -484,12 +447,7 @@ __kernel void m08400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -534,43 +492,20 @@ __kernel void m08400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -614,39 +549,25 @@ __kernel void m08400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -655,56 +576,56 @@ __kernel void m08400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
w0_t[2] = swap32 (w0[2]);
|
||||
w0_t[3] = swap32 (w0[3]);
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = swap32 (w1[0]);
|
||||
w1_t[1] = swap32 (w1[1]);
|
||||
w1_t[2] = swap32 (w1[2]);
|
||||
w1_t[3] = swap32 (w1[3]);
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = swap32 (w2[0]);
|
||||
w2_t[1] = swap32 (w2[1]);
|
||||
w2_t[2] = swap32 (w2[2]);
|
||||
w2_t[3] = swap32 (w2[3]);
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = swap32 (w3[0]);
|
||||
w3_t[1] = swap32 (w3[1]);
|
||||
@ -847,12 +768,7 @@ __kernel void m08400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
const u32 r0 = digest[3];
|
||||
const u32 r1 = digest[4];
|
||||
const u32 r2 = digest[2];
|
||||
const u32 r3 = digest[1];
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _DES_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define PERM_OP(a,b,tt,n,m) \
|
||||
{ \
|
||||
@ -583,7 +583,7 @@ __kernel void m08500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -599,69 +599,49 @@ __kernel void m08500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 8) ? 8 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -689,12 +669,10 @@ __kernel void m08500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
@ -784,7 +762,7 @@ __kernel void m08500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -812,69 +790,49 @@ __kernel void m08500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* main
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
pw_len = (pw_len >= 8) ? 8 : pw_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = 0;
|
||||
wordr0[3] = 0;
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = 0;
|
||||
wordr1[1] = 0;
|
||||
wordr1[2] = 0;
|
||||
wordr1[3] = 0;
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
@ -902,12 +860,10 @@ __kernel void m08500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
|
||||
|
||||
const u32 r0 = iv[0];
|
||||
const u32 r1 = iv[1];
|
||||
const u32 r2 = 0;
|
||||
const u32 r3 = 0;
|
||||
u32x c = 0;
|
||||
u32x d = 0;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (iv[0], iv[1], c, d);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _LOTUS5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 lotus_magic_table[256] =
|
||||
{
|
||||
@ -261,85 +261,50 @@ __kernel void m08600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -434,41 +399,20 @@ __kernel void m08600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -485,46 +429,32 @@ __kernel void m08600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
u32 pw_len = pw_l_len + pw_r_len;
|
||||
u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -543,7 +473,7 @@ __kernel void m08600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w[14] = wordl3[2] | wordr3[2];
|
||||
w[15] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 state[4];
|
||||
u32x state[4];
|
||||
|
||||
state[0] = 0;
|
||||
state[1] = 0;
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _LOTUS6_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 lotus_magic_table[256] =
|
||||
{
|
||||
@ -292,41 +292,20 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -338,46 +317,32 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -396,7 +361,7 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w[14] = wordl3[2] | wordr3[2];
|
||||
w[15] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 state[4];
|
||||
u32x state[4];
|
||||
|
||||
state[0] = 0;
|
||||
state[1] = 0;
|
||||
@ -469,10 +434,10 @@ __kernel void m08700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
domino_big_md (w, 34, state, s_lotus_magic_table);
|
||||
|
||||
u32 a = state[0] & 0xffffffff;
|
||||
u32 b = state[1] & 0xffffffff;
|
||||
u32 c = state[2] & 0x000000ff;
|
||||
u32 d = state[3] & 0x00000000;
|
||||
u32x a = state[0] & 0xffffffff;
|
||||
u32x b = state[1] & 0xffffffff;
|
||||
u32x c = state[2] & 0x000000ff;
|
||||
u32x d = state[3] & 0x00000000;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
@ -531,41 +496,20 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -589,46 +533,32 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -647,7 +577,7 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w[14] = wordl3[2] | wordr3[2];
|
||||
w[15] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 state[4];
|
||||
u32x state[4];
|
||||
|
||||
state[0] = 0;
|
||||
state[1] = 0;
|
||||
@ -720,10 +650,10 @@ __kernel void m08700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
domino_big_md (w, 34, state, s_lotus_magic_table);
|
||||
|
||||
u32 a = state[0] & 0xffffffff;
|
||||
u32 b = state[1] & 0xffffffff;
|
||||
u32 c = state[2] & 0x000000ff;
|
||||
u32 d = state[3] & 0x00000000;
|
||||
u32x a = state[0] & 0xffffffff;
|
||||
u32x b = state[1] & 0xffffffff;
|
||||
u32x c = state[2] & 0x000000ff;
|
||||
u32x d = state[3] & 0x00000000;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _OLDOFFICE01_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
|
||||
{
|
||||
@ -478,41 +478,20 @@ __kernel void m09720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -528,67 +507,53 @@ __kernel void m09720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -597,10 +562,10 @@ __kernel void m09720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_2x4 (w0, w1, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -630,8 +595,8 @@ __kernel void m09720_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
gen336 (digest_pre, salt_buf, digest);
|
||||
|
||||
u32 a = digest[0];
|
||||
u32 b = digest[1] & 0xff;
|
||||
u32x a = digest[0];
|
||||
u32x b = digest[1] & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
@ -666,41 +631,20 @@ __kernel void m09720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -728,67 +672,53 @@ __kernel void m09720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -797,10 +727,10 @@ __kernel void m09720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_2x4 (w0, w1, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -830,8 +760,8 @@ __kernel void m09720_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
gen336 (digest_pre, salt_buf, digest);
|
||||
|
||||
u32 a = digest[0];
|
||||
u32 b = digest[1] & 0xff;
|
||||
u32x a = digest[0];
|
||||
u32x b = digest[1] & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _OLDOFFICE34_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5])
|
||||
{
|
||||
@ -164,41 +164,20 @@ __kernel void m09820_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -216,69 +195,55 @@ __kernel void m09820_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -287,10 +252,10 @@ __kernel void m09820_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_2x4 (w0, w1, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -349,8 +314,8 @@ __kernel void m09820_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
u32 a = swap32 (digest[0]);
|
||||
u32 b = swap32 (digest[1]) & 0xff;
|
||||
u32x a = swap32 (digest[0]);
|
||||
u32x b = swap32 (digest[1]) & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
@ -385,41 +350,20 @@ __kernel void m09820_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -449,69 +393,55 @@ __kernel void m09820_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
const u32 pw_salt_len = (pw_len * 2) + salt_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -520,10 +450,10 @@ __kernel void m09820_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_2x4 (w0, w1, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
make_unicode (w0, w0_t, w1_t);
|
||||
make_unicode (w1, w2_t, w3_t);
|
||||
@ -582,8 +512,8 @@ __kernel void m09820_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
sha1_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
u32 a = swap32 (digest[0]);
|
||||
u32 b = swap32 (digest[1]) & 0xff;
|
||||
u32x a = swap32 (digest[0]);
|
||||
u32x b = swap32 (digest[1]) & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m09900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,116 +36,81 @@ __kernel void m09900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -319,13 +284,7 @@ __kernel void m09900_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -353,41 +312,20 @@ __kernel void m09900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -404,77 +342,63 @@ __kernel void m09900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01);
|
||||
@ -651,13 +575,7 @@ __kernel void m09900_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SIPHASH_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define SIPROUND(v0,v1,v2,v3) \
|
||||
(v0) += (v1); \
|
||||
@ -52,41 +52,20 @@ __kernel void m10100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
@ -105,46 +84,32 @@ __kernel void m10100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -232,41 +197,20 @@ __kernel void m10100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -297,46 +241,32 @@ __kernel void m10100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 padding[8] =
|
||||
{
|
||||
@ -146,43 +146,20 @@ __kernel void m10420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* U_buf
|
||||
*/
|
||||
@ -211,77 +188,63 @@ __kernel void m10420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
// max length supported by pdf11 is 32
|
||||
|
||||
@ -353,8 +316,8 @@ __kernel void m10420_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
u32 a = digest[0];
|
||||
u32 b = digest[1] & 0xff;
|
||||
u32x a = digest[0];
|
||||
u32x b = digest[1] & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
@ -389,43 +352,20 @@ __kernel void m10420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -466,77 +406,63 @@ __kernel void m10420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
w3[2] = wordl3[2] | wordr3[2];
|
||||
w3[3] = wordl3[3] | wordr3[3];
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
// max length supported by pdf11 is 32
|
||||
|
||||
@ -608,8 +534,8 @@ __kernel void m10420_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
md5_transform (w0_t, w1_t, w2_t, w3_t, digest);
|
||||
|
||||
u32 a = digest[0];
|
||||
u32 b = digest[1] & 0xff;
|
||||
u32x a = digest[0];
|
||||
u32x b = digest[1] & 0xff;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA384_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u64 k_sha384[80] =
|
||||
{
|
||||
@ -156,80 +156,43 @@ __kernel void m10800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -238,10 +201,10 @@ __kernel void m10800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -264,10 +227,10 @@ __kernel void m10800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA384
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
@ -332,43 +295,20 @@ __kernel void m10800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -385,39 +325,25 @@ __kernel void m10800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -426,10 +352,10 @@ __kernel void m10800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -452,10 +378,10 @@ __kernel void m10800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* SHA384
|
||||
*/
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = swap32 (w0[0]);
|
||||
w0_t[1] = swap32 (w0[1]);
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,41 +36,20 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -107,43 +86,35 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -164,7 +135,7 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
@ -172,10 +143,10 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// first step fixed 56 bytes of salt
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
@ -203,10 +174,10 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// first transform
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -377,13 +348,7 @@ __kernel void m11000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -411,41 +376,20 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -494,43 +438,35 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32 wordr1[4];
|
||||
u32 wordr2[4];
|
||||
u32 wordr3[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
u32x w0[4];
|
||||
u32x w1[4];
|
||||
u32x w2[4];
|
||||
u32x w3[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
@ -551,7 +487,7 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
append_0x80_4x4 (w0, w1, w2, w3, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
/**
|
||||
* prepend salt
|
||||
@ -559,10 +495,10 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// first step fixed 56 bytes of salt
|
||||
|
||||
u32 w0_t[4];
|
||||
u32 w1_t[4];
|
||||
u32 w2_t[4];
|
||||
u32 w3_t[4];
|
||||
u32x w0_t[4];
|
||||
u32x w1_t[4];
|
||||
u32x w2_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
@ -590,10 +526,10 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// first transform
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -764,13 +700,7 @@ __kernel void m11000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,41 +55,20 @@ __kernel void m11100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* challenge
|
||||
*/
|
||||
@ -122,67 +101,53 @@ __kernel void m11100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -195,7 +160,7 @@ __kernel void m11100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0_t[0] |= wordl0[0] | wordr0[0];
|
||||
w0_t[1] |= wordl0[1] | wordr0[1];
|
||||
@ -221,10 +186,10 @@ __kernel void m11100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5 ($pass.$salt)
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -404,13 +369,7 @@ __kernel void m11100_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -455,41 +414,20 @@ __kernel void m11100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* challenge
|
||||
*/
|
||||
@ -534,67 +472,53 @@ __kernel void m11100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = salt_buf0[0];
|
||||
w0_t[1] = salt_buf0[1];
|
||||
w0_t[2] = salt_buf0[2];
|
||||
w0_t[3] = salt_buf0[3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = salt_buf1[0];
|
||||
w1_t[1] = salt_buf1[1];
|
||||
w1_t[2] = salt_buf1[2];
|
||||
w1_t[3] = salt_buf1[3];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = 0;
|
||||
w2_t[1] = 0;
|
||||
w2_t[2] = 0;
|
||||
w2_t[3] = 0;
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = 0;
|
||||
w3_t[1] = 0;
|
||||
@ -607,7 +531,7 @@ __kernel void m11100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, pw_len);
|
||||
|
||||
const u32 pw_salt_len = pw_len + salt_len;
|
||||
const u32x pw_salt_len = pw_len + salt_len;
|
||||
|
||||
w0_t[0] |= wordl0[0] | wordr0[0];
|
||||
w0_t[1] |= wordl0[1] | wordr0[1];
|
||||
@ -633,10 +557,10 @@ __kernel void m11100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* md5 ($pass.$salt)
|
||||
*/
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -816,13 +740,7 @@ __kernel void m11100_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31);
|
||||
MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32);
|
||||
MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33);
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
|
||||
{
|
||||
@ -36,43 +36,20 @@ __kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -89,39 +66,25 @@ __kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -130,28 +93,28 @@ __kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -162,28 +125,28 @@ __kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1 ($pass)
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -551,13 +514,7 @@ __kernel void m11200_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c ^= plain_sha1_c;
|
||||
d ^= plain_sha1_d;
|
||||
e ^= plain_sha1_e;
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -585,43 +542,20 @@ __kernel void m11200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -650,39 +584,25 @@ __kernel void m11200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -691,28 +611,28 @@ __kernel void m11200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -723,28 +643,28 @@ __kernel void m11200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1 ($pass)
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -1112,13 +1032,7 @@ __kernel void m11200_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
c ^= plain_sha1_c;
|
||||
d ^= plain_sha1_d;
|
||||
e ^= plain_sha1_e;
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = e;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _MD5_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_lower8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -776,43 +776,20 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -927,67 +904,53 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -1044,28 +1007,28 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = block0[ 0];
|
||||
w0_t[1] = block0[ 1];
|
||||
w0_t[2] = block0[ 2];
|
||||
w0_t[3] = block0[ 3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = block0[ 4];
|
||||
w1_t[1] = block0[ 5];
|
||||
w1_t[2] = block0[ 6];
|
||||
w1_t[3] = block0[ 7];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = block0[ 8];
|
||||
w2_t[1] = block0[ 9];
|
||||
w2_t[2] = block0[10];
|
||||
w2_t[3] = block0[11];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = block0[12];
|
||||
w3_t[1] = block0[13];
|
||||
@ -1079,10 +1042,10 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// md5
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -1584,13 +1547,7 @@ __kernel void m11400_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1635,43 +1592,20 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -1798,67 +1732,53 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -1915,28 +1835,28 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len);
|
||||
|
||||
u32 w0_t[4];
|
||||
u32x w0_t[4];
|
||||
|
||||
w0_t[0] = block0[ 0];
|
||||
w0_t[1] = block0[ 1];
|
||||
w0_t[2] = block0[ 2];
|
||||
w0_t[3] = block0[ 3];
|
||||
|
||||
u32 w1_t[4];
|
||||
u32x w1_t[4];
|
||||
|
||||
w1_t[0] = block0[ 4];
|
||||
w1_t[1] = block0[ 5];
|
||||
w1_t[2] = block0[ 6];
|
||||
w1_t[3] = block0[ 7];
|
||||
|
||||
u32 w2_t[4];
|
||||
u32x w2_t[4];
|
||||
|
||||
w2_t[0] = block0[ 8];
|
||||
w2_t[1] = block0[ 9];
|
||||
w2_t[2] = block0[10];
|
||||
w2_t[3] = block0[11];
|
||||
|
||||
u32 w3_t[4];
|
||||
u32x w3_t[4];
|
||||
|
||||
w3_t[0] = block0[12];
|
||||
w3_t[1] = block0[13];
|
||||
@ -1950,10 +1870,10 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
// md5
|
||||
|
||||
u32 a = MD5M_A;
|
||||
u32 b = MD5M_B;
|
||||
u32 c = MD5M_C;
|
||||
u32 d = MD5M_D;
|
||||
u32x a = MD5M_A;
|
||||
u32x b = MD5M_B;
|
||||
u32x c = MD5M_C;
|
||||
u32x d = MD5M_D;
|
||||
|
||||
MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00);
|
||||
MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01);
|
||||
@ -2455,13 +2375,7 @@ __kernel void m11400_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
b += r_b;
|
||||
c += r_c;
|
||||
d += r_d;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = d;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = b;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (a, d, c, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _CRC32_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
__constant u32 crc32tab[0x100] =
|
||||
{
|
||||
@ -143,85 +143,50 @@ __kernel void m11500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = wordl0[0] | wordr0[0];
|
||||
w_t[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -240,8 +205,8 @@ __kernel void m11500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w_t[14] = wordl3[2] | wordr3[2];
|
||||
w_t[15] = 0;
|
||||
|
||||
u32 a = crc32 (w_t, pw_len, iv);
|
||||
u32 b = 0;
|
||||
u32x a = crc32 (w_t, pw_len, iv);
|
||||
u32x b = 0;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
@ -276,41 +241,20 @@ __kernel void m11500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* digest
|
||||
*/
|
||||
@ -329,46 +273,32 @@ __kernel void m11500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w_t[16];
|
||||
u32x w_t[16];
|
||||
|
||||
w_t[ 0] = wordl0[0] | wordr0[0];
|
||||
w_t[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -387,8 +317,8 @@ __kernel void m11500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
w_t[14] = wordl3[2] | wordr3[2];
|
||||
w_t[15] = 0;
|
||||
|
||||
u32 a = crc32 (w_t, pw_len, iv);
|
||||
u32 b = 0;
|
||||
u32x a = crc32 (w_t, pw_len, iv);
|
||||
u32x b = 0;
|
||||
|
||||
const u32 r0 = a;
|
||||
const u32 r1 = b;
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _GOST2012_256_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define INITVAL 0x0101010101010101
|
||||
|
||||
@ -2320,87 +2320,50 @@ __kernel void m11700_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -2539,87 +2502,50 @@ __kernel void m11700_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#define _GOST2012_512_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -18,9 +20,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define INITVAL 0
|
||||
|
||||
@ -2320,87 +2320,50 @@ __kernel void m11800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
@ -2539,87 +2502,50 @@ __kernel void m11800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w[16];
|
||||
u32x w[16];
|
||||
|
||||
w[ 0] = wordl0[0] | wordr0[0];
|
||||
w[ 1] = wordl0[1] | wordr0[1];
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#define _SHA256_SHA1_
|
||||
|
||||
#define NEW_SIMD_CODE
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/kernel_vendor.h"
|
||||
|
||||
@ -16,9 +18,7 @@
|
||||
#include "include/kernel_functions.c"
|
||||
#include "OpenCL/types_ocl.c"
|
||||
#include "OpenCL/common.c"
|
||||
|
||||
#define COMPARE_S "OpenCL/check_single_comp4.c"
|
||||
#define COMPARE_M "OpenCL/check_multi_comp4.c"
|
||||
#include "OpenCL/simd.c"
|
||||
|
||||
#define uint_to_hex_upper8(i) l_bin2asc[(i)]
|
||||
|
||||
@ -55,43 +55,20 @@ __kernel void m12600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -111,39 +88,25 @@ __kernel void m12600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -152,28 +115,28 @@ __kernel void m12600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -184,31 +147,31 @@ __kernel void m12600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32 f = 0;
|
||||
u32 g = 0;
|
||||
u32 h = 0;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
u32x f = 0;
|
||||
u32x g = 0;
|
||||
u32x h = 0;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -431,13 +394,7 @@ __kernel void m12600_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_M
|
||||
COMPARE_M_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
@ -482,43 +439,20 @@ __kernel void m12600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* base
|
||||
*/
|
||||
|
||||
u32 wordl0[4];
|
||||
u32 pws0[4] = { 0 };
|
||||
u32 pws1[4] = { 0 };
|
||||
|
||||
wordl0[0] = pws[gid].i[ 0];
|
||||
wordl0[1] = pws[gid].i[ 1];
|
||||
wordl0[2] = pws[gid].i[ 2];
|
||||
wordl0[3] = pws[gid].i[ 3];
|
||||
|
||||
u32 wordl1[4];
|
||||
|
||||
wordl1[0] = pws[gid].i[ 4];
|
||||
wordl1[1] = pws[gid].i[ 5];
|
||||
wordl1[2] = pws[gid].i[ 6];
|
||||
wordl1[3] = pws[gid].i[ 7];
|
||||
|
||||
u32 wordl2[4];
|
||||
|
||||
wordl2[0] = 0;
|
||||
wordl2[1] = 0;
|
||||
wordl2[2] = 0;
|
||||
wordl2[3] = 0;
|
||||
|
||||
u32 wordl3[4];
|
||||
|
||||
wordl3[0] = 0;
|
||||
wordl3[1] = 0;
|
||||
wordl3[2] = 0;
|
||||
wordl3[3] = 0;
|
||||
pws0[0] = pws[gid].i[0];
|
||||
pws0[1] = pws[gid].i[1];
|
||||
pws0[2] = pws[gid].i[2];
|
||||
pws0[3] = pws[gid].i[3];
|
||||
pws1[0] = pws[gid].i[4];
|
||||
pws1[1] = pws[gid].i[5];
|
||||
pws1[2] = pws[gid].i[6];
|
||||
pws1[3] = pws[gid].i[7];
|
||||
|
||||
const u32 pw_l_len = pws[gid].pw_len;
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
|
||||
{
|
||||
append_0x80_2x4 (wordl0, wordl1, pw_l_len);
|
||||
|
||||
switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* salt
|
||||
*/
|
||||
@ -550,39 +484,25 @@ __kernel void m12600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* loop
|
||||
*/
|
||||
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++)
|
||||
for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE)
|
||||
{
|
||||
const u32 pw_r_len = combs_buf[il_pos].pw_len;
|
||||
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
|
||||
|
||||
const u32 pw_len = pw_l_len + pw_r_len;
|
||||
const u32x pw_len = pw_l_len + pw_r_len;
|
||||
|
||||
u32 wordr0[4];
|
||||
u32x wordr0[4] = { 0 };
|
||||
u32x wordr1[4] = { 0 };
|
||||
u32x wordr2[4] = { 0 };
|
||||
u32x wordr3[4] = { 0 };
|
||||
|
||||
wordr0[0] = combs_buf[il_pos].i[0];
|
||||
wordr0[1] = combs_buf[il_pos].i[1];
|
||||
wordr0[2] = combs_buf[il_pos].i[2];
|
||||
wordr0[3] = combs_buf[il_pos].i[3];
|
||||
|
||||
u32 wordr1[4];
|
||||
|
||||
wordr1[0] = combs_buf[il_pos].i[4];
|
||||
wordr1[1] = combs_buf[il_pos].i[5];
|
||||
wordr1[2] = combs_buf[il_pos].i[6];
|
||||
wordr1[3] = combs_buf[il_pos].i[7];
|
||||
|
||||
u32 wordr2[4];
|
||||
|
||||
wordr2[0] = 0;
|
||||
wordr2[1] = 0;
|
||||
wordr2[2] = 0;
|
||||
wordr2[3] = 0;
|
||||
|
||||
u32 wordr3[4];
|
||||
|
||||
wordr3[0] = 0;
|
||||
wordr3[1] = 0;
|
||||
wordr3[2] = 0;
|
||||
wordr3[3] = 0;
|
||||
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
|
||||
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
|
||||
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
|
||||
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
|
||||
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
|
||||
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
|
||||
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
|
||||
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
|
||||
|
||||
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
|
||||
{
|
||||
@ -591,28 +511,28 @@ __kernel void m12600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len);
|
||||
}
|
||||
|
||||
u32 w0[4];
|
||||
u32x w0[4];
|
||||
|
||||
w0[0] = wordl0[0] | wordr0[0];
|
||||
w0[1] = wordl0[1] | wordr0[1];
|
||||
w0[2] = wordl0[2] | wordr0[2];
|
||||
w0[3] = wordl0[3] | wordr0[3];
|
||||
|
||||
u32 w1[4];
|
||||
u32x w1[4];
|
||||
|
||||
w1[0] = wordl1[0] | wordr1[0];
|
||||
w1[1] = wordl1[1] | wordr1[1];
|
||||
w1[2] = wordl1[2] | wordr1[2];
|
||||
w1[3] = wordl1[3] | wordr1[3];
|
||||
|
||||
u32 w2[4];
|
||||
u32x w2[4];
|
||||
|
||||
w2[0] = wordl2[0] | wordr2[0];
|
||||
w2[1] = wordl2[1] | wordr2[1];
|
||||
w2[2] = wordl2[2] | wordr2[2];
|
||||
w2[3] = wordl2[3] | wordr2[3];
|
||||
|
||||
u32 w3[4];
|
||||
u32x w3[4];
|
||||
|
||||
w3[0] = wordl3[0] | wordr3[0];
|
||||
w3[1] = wordl3[1] | wordr3[1];
|
||||
@ -623,31 +543,31 @@ __kernel void m12600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
* sha1
|
||||
*/
|
||||
|
||||
u32 w0_t = swap32 (w0[0]);
|
||||
u32 w1_t = swap32 (w0[1]);
|
||||
u32 w2_t = swap32 (w0[2]);
|
||||
u32 w3_t = swap32 (w0[3]);
|
||||
u32 w4_t = swap32 (w1[0]);
|
||||
u32 w5_t = swap32 (w1[1]);
|
||||
u32 w6_t = swap32 (w1[2]);
|
||||
u32 w7_t = swap32 (w1[3]);
|
||||
u32 w8_t = swap32 (w2[0]);
|
||||
u32 w9_t = swap32 (w2[1]);
|
||||
u32 wa_t = swap32 (w2[2]);
|
||||
u32 wb_t = swap32 (w2[3]);
|
||||
u32 wc_t = swap32 (w3[0]);
|
||||
u32 wd_t = swap32 (w3[1]);
|
||||
u32 we_t = 0;
|
||||
u32 wf_t = pw_len * 8;
|
||||
u32x w0_t = swap32 (w0[0]);
|
||||
u32x w1_t = swap32 (w0[1]);
|
||||
u32x w2_t = swap32 (w0[2]);
|
||||
u32x w3_t = swap32 (w0[3]);
|
||||
u32x w4_t = swap32 (w1[0]);
|
||||
u32x w5_t = swap32 (w1[1]);
|
||||
u32x w6_t = swap32 (w1[2]);
|
||||
u32x w7_t = swap32 (w1[3]);
|
||||
u32x w8_t = swap32 (w2[0]);
|
||||
u32x w9_t = swap32 (w2[1]);
|
||||
u32x wa_t = swap32 (w2[2]);
|
||||
u32x wb_t = swap32 (w2[3]);
|
||||
u32x wc_t = swap32 (w3[0]);
|
||||
u32x wd_t = swap32 (w3[1]);
|
||||
u32x we_t = 0;
|
||||
u32x wf_t = pw_len * 8;
|
||||
|
||||
u32 a = SHA1M_A;
|
||||
u32 b = SHA1M_B;
|
||||
u32 c = SHA1M_C;
|
||||
u32 d = SHA1M_D;
|
||||
u32 e = SHA1M_E;
|
||||
u32 f = 0;
|
||||
u32 g = 0;
|
||||
u32 h = 0;
|
||||
u32x a = SHA1M_A;
|
||||
u32x b = SHA1M_B;
|
||||
u32x c = SHA1M_C;
|
||||
u32x d = SHA1M_D;
|
||||
u32x e = SHA1M_E;
|
||||
u32x f = 0;
|
||||
u32x g = 0;
|
||||
u32x h = 0;
|
||||
|
||||
#undef K
|
||||
#define K SHA1C00
|
||||
@ -870,13 +790,7 @@ __kernel void m12600_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
|
||||
wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d);
|
||||
we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e);
|
||||
wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f);
|
||||
|
||||
const u32 r0 = d;
|
||||
const u32 r1 = h;
|
||||
const u32 r2 = c;
|
||||
const u32 r3 = g;
|
||||
|
||||
#include COMPARE_S
|
||||
COMPARE_S_SIMD (d, h, c, g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4494,6 +4494,8 @@ static void *thread_calc_stdin (void *p)
|
||||
run_cracker (device_param, pws_cnt);
|
||||
|
||||
device_param->pws_cnt = 0;
|
||||
|
||||
memset (device_param->pws_buf, 0, device_param->size_pws);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4753,6 +4755,8 @@ static void *thread_calc (void *p)
|
||||
run_cracker (device_param, pws_cnt);
|
||||
|
||||
device_param->pws_cnt = 0;
|
||||
|
||||
memset (device_param->pws_buf, 0, device_param->size_pws);
|
||||
}
|
||||
|
||||
if (data.devices_status == STATUS_STOP_AT_CHECKPOINT) check_checkpoint ();
|
||||
|
Loading…
Reference in New Issue
Block a user