Refactor some u32x to u32 where u32x is not needed

pull/1373/head
jsteube 7 years ago
parent 35a24df55e
commit 5e01ff4c53

@ -342,9 +342,9 @@ __constant u32a c_skb[8][64] =
#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -379,45 +379,45 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
}
}
void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
const u32 E1 = (mask >> 2) & 0x3f0;
const u32 E0 = mask & 0x3f;
u32x r = 0;
u32x l = 0;
u32 r = 0;
u32 l = 0;
for (u32 i = 0; i < 25; i++)
{
@ -426,8 +426,8 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
#endif
for (u32 j = 0; j < 16; j += 2)
{
u32x t;
u32x u;
u32 t;
u32 u;
t = r ^ (r >> 16);
u = t & E0;
@ -470,7 +470,7 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
}
u32x tt;
u32 tt;
tt = l;
l = r;
@ -527,19 +527,7 @@ __kernel void m01500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* salt
@ -553,28 +541,25 @@ __kernel void m01500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32x data[2];
u32 data[2];
data[0] = (w0[0] << 1) & 0xfefefefe;
data[1] = (w0[1] << 1) & 0xfefefefe;
data[0] = (tmp.i[0] << 1) & 0xfefefefe;
data[1] = (tmp.i[1] << 1) & 0xfefefefe;
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -626,19 +611,7 @@ __kernel void m01500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* salt
@ -664,28 +637,25 @@ __kernel void m01500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32x data[2];
u32 data[2];
data[0] = (w0[0] << 1) & 0xfefefefe;
data[1] = (w0[1] << 1) & 0xfefefefe;
data[0] = (tmp.i[0] << 1) & 0xfefefefe;
data[1] = (tmp.i[1] << 1) & 0xfefefefe;
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -340,9 +340,9 @@ __constant u32a c_skb[8][64] =
#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -377,45 +377,45 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
}
}
void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
const u32 E1 = (mask >> 2) & 0x3f0;
const u32 E0 = mask & 0x3f;
u32x r = 0;
u32x l = 0;
u32 r = 0;
u32 l = 0;
for (u32 i = 0; i < 25; i++)
{
@ -424,8 +424,8 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
#endif
for (u32 j = 0; j < 16; j += 2)
{
u32x t;
u32x u;
u32 t;
u32 u;
t = r ^ (r >> 16);
u = t & E0;
@ -468,7 +468,7 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
}
u32x tt;
u32 tt;
tt = l;
l = r;
@ -551,18 +551,18 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -573,10 +573,10 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -596,10 +596,10 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -622,21 +622,21 @@ __kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* DES
*/
u32x data[2];
u32 data[2];
data[0] = (w0[0] << 1) & 0xfefefefe;
data[1] = (w0[1] << 1) & 0xfefefefe;
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -726,18 +726,18 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -748,10 +748,10 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -771,10 +771,10 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -797,21 +797,21 @@ __kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* DES
*/
u32x data[2];
u32 data[2];
data[0] = (w0[0] << 1) & 0xfefefefe;
data[1] = (w0[1] << 1) & 0xfefefefe;
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb);
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -345,18 +345,18 @@ __constant u32a c_skb[8][64] =
#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
u32x r = data[0];
u32x l = data[1];
u32 r = data[0];
u32 l = data[1];
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
u32x t;
u32 u;
u32 t;
u = Kc[i + 0] ^ rotl32 (r, 30u);
t = Kd[i + 0] ^ rotl32 (r, 26u);
@ -387,9 +387,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
iv[1] = rotl32 (r, 29);
}
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -424,41 +424,41 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
}
}
void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
void transform_netntlmv1_key (const u32 w0, const u32x w1, u32x out[2])
{
u32x t[8];
u32 t[8];
t[0] = (w0 >> 0) & 0xff;
t[1] = (w0 >> 8) & 0xff;
@ -469,7 +469,7 @@ void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
t[6] = (w1 >> 16) & 0xff;
t[7] = (w1 >> 24) & 0xff;
u32x k[8];
u32 k[8];
k[0] = (t[0] >> 0);
k[1] = (t[0] << 7) | (t[1] >> 1);
@ -537,19 +537,7 @@ __kernel void m03000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* main
@ -557,35 +545,32 @@ __kernel void m03000_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32x key[2];
u32 key[2];
transform_netntlmv1_key (w0[0], w0[1], key);
transform_netntlmv1_key (tmp.i[0], tmp.i[0], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = LM_IV_0_IP_RR3;
data[1] = LM_IV_1_IP_RR3;
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -637,19 +622,7 @@ __kernel void m03000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* digest
@ -669,35 +642,32 @@ __kernel void m03000_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32x key[2];
u32 key[2];
transform_netntlmv1_key (w0[0], w0[1], key);
transform_netntlmv1_key (tmp.i[0], tmp.i[0], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = LM_IV_0_IP_RR3;
data[1] = LM_IV_1_IP_RR3;
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -343,18 +343,18 @@ __constant u32a c_skb[8][64] =
#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
u32x r = data[0];
u32x l = data[1];
u32 r = data[0];
u32 l = data[1];
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
u32x t;
u32 u;
u32 t;
u = Kc[i + 0] ^ rotl32 (r, 30u);
t = Kd[i + 0] ^ rotl32 (r, 26u);
@ -385,9 +385,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
iv[1] = rotl32 (r, 29);
}
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -422,41 +422,41 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
}
}
void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
void transform_netntlmv1_key (const u32 w0, const u32x w1, u32x out[2])
{
u32x t[8];
u32 t[8];
t[0] = (w0 >> 0) & 0xff;
t[1] = (w0 >> 8) & 0xff;
@ -467,7 +467,7 @@ void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2])
t[6] = (w1 >> 16) & 0xff;
t[7] = (w1 >> 24) & 0xff;
u32x k[8];
u32 k[8];
k[0] = (t[0] >> 0);
k[1] = (t[0] << 7) | (t[1] >> 1);
@ -555,18 +555,18 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -577,10 +577,10 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -600,10 +600,10 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -626,28 +626,28 @@ __kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* DES
*/
u32x key[2];
u32 key[2];
transform_netntlmv1_key (w0[0], w0[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = LM_IV_0_IP_RR3;
data[1] = LM_IV_1_IP_RR3;
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -731,18 +731,18 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -753,10 +753,10 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -776,10 +776,10 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -802,28 +802,28 @@ __kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* DES
*/
u32x key[2];
u32 key[2];
transform_netntlmv1_key (w0[0], w0[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = LM_IV_0_IP_RR3;
data[1] = LM_IV_1_IP_RR3;
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -394,18 +394,18 @@ __constant u32a c_skb[8][64] =
#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
#endif
void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
u32x r = data[0];
u32x l = data[1];
u32 r = data[0];
u32 l = data[1];
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
u32x t;
u32 u;
u32 t;
u = Kc[i + 0] ^ r;
t = Kd[i + 0] ^ rotl32 (r, 28u);
@ -436,9 +436,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
iv[1] = r;
}
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -473,32 +473,32 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
@ -508,7 +508,7 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
}
}
void transform_racf_key (const u32x w0, const u32x w1, u32x key[2])
void transform_racf_key (const u32 w0, const u32x w1, u32x key[2])
{
key[0] = BOX1 (((w0 >> 0) & 0xff), c_ascii_to_ebcdic_pc) << 0
| BOX1 (((w0 >> 8) & 0xff), c_ascii_to_ebcdic_pc) << 8
@ -567,19 +567,7 @@ __kernel void m08500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = 0;
pw_buf0[3] = 0;
pw_buf1[0] = 0;
pw_buf1[1] = 0;
pw_buf1[2] = 0;
pw_buf1[3] = 0;
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* salt
@ -596,39 +584,36 @@ __kernel void m08500_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
/**
* RACF
*/
u32x key[2];
u32 key[2];
transform_racf_key (w0[0], w0[1], key);
transform_racf_key (tmp.i[0], tmp.i[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = salt_buf0[0];
data[1] = salt_buf0[1];
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -680,19 +665,7 @@ __kernel void m08500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = 0;
pw_buf0[3] = 0;
pw_buf1[0] = 0;
pw_buf1[1] = 0;
pw_buf1[2] = 0;
pw_buf1[3] = 0;
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* salt
@ -721,39 +694,36 @@ __kernel void m08500_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
/**
* RACF
*/
u32x key[2];
u32 key[2];
transform_racf_key (w0[0], w0[1], key);
transform_racf_key (tmp.i[0], tmp.i[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = salt_buf0[0];
data[1] = salt_buf0[1];
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -392,18 +392,18 @@ __constant u32a c_skb[8][64] =
#define BOX1(i,S) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
#endif
void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
void _des_crypt_encrypt (u32 iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 (*s_SPtrans)[64])
{
u32x r = data[0];
u32x l = data[1];
u32 r = data[0];
u32 l = data[1];
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
u32x t;
u32 u;
u32 t;
u = Kc[i + 0] ^ r;
t = Kd[i + 0] ^ rotl32 (r, 28u);
@ -434,9 +434,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
iv[1] = r;
}
void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
void _des_crypt_keysetup (u32 c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 (*s_skb)[64])
{
u32x tt;
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
@ -471,32 +471,32 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32x c00 = (c >> 0) & 0x0000003f;
const u32x c06 = (c >> 6) & 0x00383003;
const u32x c07 = (c >> 7) & 0x0000003c;
const u32x c13 = (c >> 13) & 0x0000060f;
const u32x c20 = (c >> 20) & 0x00000001;
u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32x d00 = (d >> 0) & 0x00003c3f;
const u32x d07 = (d >> 7) & 0x00003f03;
const u32x d21 = (d >> 21) & 0x0000000f;
const u32x d22 = (d >> 22) & 0x00000030;
u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
@ -506,7 +506,7 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
}
}
void transform_racf_key (const u32x w0, const u32x w1, u32x key[2])
void transform_racf_key (const u32 w0, const u32x w1, u32x key[2])
{
key[0] = BOX1 (((w0 >> 0) & 0xff), c_ascii_to_ebcdic_pc) << 0
| BOX1 (((w0 >> 8) & 0xff), c_ascii_to_ebcdic_pc) << 8
@ -594,18 +594,18 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -616,10 +616,10 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -639,7 +639,7 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[2];
u32 w0[2];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -648,28 +648,28 @@ __kernel void m08500_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* RACF
*/
u32x key[2];
u32 key[2];
transform_racf_key (w0[0], w0[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = salt_buf0[0];
data[1] = salt_buf0[1];
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_M_SIMD (iv[0], iv[1], z, z);
}
@ -762,18 +762,18 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -784,10 +784,10 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -807,7 +807,7 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[2];
u32 w0[2];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -816,28 +816,28 @@ __kernel void m08500_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* RACF
*/
u32x key[2];
u32 key[2];
transform_racf_key (w0[0], w0[1], key);
const u32x c = key[0];
const u32x d = key[1];
const u32 c = key[0];
const u32 d = key[1];
u32x Kc[16];
u32x Kd[16];
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (c, d, Kc, Kd, s_skb);
u32x data[2];
u32 data[2];
data[0] = salt_buf0[0];
data[1] = salt_buf0[1];
u32x iv[2];
u32 iv[2];
_des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans);
u32x z = 0;
u32 z = 0;
COMPARE_S_SIMD (iv[0], iv[1], z, z);
}

@ -63,9 +63,9 @@ __constant u32a lotus_magic_table[256] =
#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
#endif
void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table)
{
u32x p = 0;
u32 p = 0;
for (int i = 0; i < 18; i++)
{
@ -73,8 +73,8 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
u32x tmp_out = 0;
u32 tmp_in = in[j];
u32 tmp_out = 0;
p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 0;
p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 8;
@ -86,11 +86,11 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
}
}
void lotus_transform_password (const u32x in[4], u32x out[4], __local u32 *s_lotus_magic_table)
void lotus_transform_password (const u32 in[4], u32x out[4], __local u32 *s_lotus_magic_table)
{
u32x t = out[3] >> 24;
u32 t = out[3] >> 24;
u32x c;
u32 c;
#ifdef _unroll
#pragma unroll
@ -183,9 +183,9 @@ void pad (u32 w[4], const u32 len)
}
}
void mdtransform_norecalc (u32x state[4], const u32x block[4], __local u32 *s_lotus_magic_table)
void mdtransform_norecalc (u32 state[4], const u32x block[4], __local u32 *s_lotus_magic_table)
{
u32x x[12];
u32 x[12];
x[ 0] = state[0];
x[ 1] = state[1];
@ -208,16 +208,16 @@ void mdtransform_norecalc (u32x state[4], const u32x block[4], __local u32 *s_lo
state[3] = x[3];
}
void mdtransform (u32x state[4], u32x checksum[4], const u32x block[4], __local u32 *s_lotus_magic_table)
void mdtransform (u32 state[4], u32x checksum[4], const u32x block[4], __local u32 *s_lotus_magic_table)
{
mdtransform_norecalc (state, block, s_lotus_magic_table);
lotus_transform_password (block, checksum, s_lotus_magic_table);
}
void domino_big_md (const u32x saved_key[4], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table)
void domino_big_md (const u32 saved_key[4], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table)
{
u32x checksum[4];
u32 checksum[4];
checksum[0] = 0;
checksum[1] = 0;
@ -258,19 +258,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
pw_buf1[0] = pws[gid].i[ 4];
pw_buf1[1] = pws[gid].i[ 5];
pw_buf1[2] = pws[gid].i[ 6];
pw_buf1[3] = pws[gid].i[ 7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* loop
@ -278,18 +266,15 @@ __kernel void m08600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
/**
* domino
*/
u32x state[4];
u32 state[4];
state[0] = 0;
state[1] = 0;
@ -300,9 +285,9 @@ __kernel void m08600_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* padding
*/
pad (w0, out_len);
pad (tmp.i, out_len);
domino_big_md (w0, out_len, state, s_lotus_magic_table);
domino_big_md (tmp.i, out_len, state, s_lotus_magic_table);
COMPARE_M_SIMD (state[0], state[1], state[2], state[3]);
}
@ -337,19 +322,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* base
*/
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[ 0];
pw_buf0[1] = pws[gid].i[ 1];
pw_buf0[2] = pws[gid].i[ 2];
pw_buf0[3] = pws[gid].i[ 3];
pw_buf1[0] = pws[gid].i[ 4];
pw_buf1[1] = pws[gid].i[ 5];
pw_buf1[2] = pws[gid].i[ 6];
pw_buf1[3] = pws[gid].i[ 7];
const u32 pw_len = pws[gid].pw_len;
COPY_PW (pws[gid]);
/**
* digest
@ -369,18 +342,15 @@ __kernel void m08600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
pw_t tmp = PASTE_PW;
const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
/**
* domino
*/
u32x state[4];
u32 state[4];
state[0] = 0;
state[1] = 0;
@ -391,9 +361,9 @@ __kernel void m08600_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* padding
*/
pad (w0, out_len);
pad (tmp.i, out_len);
domino_big_md (w0, out_len, state, s_lotus_magic_table);
domino_big_md (tmp.i, out_len, state, s_lotus_magic_table);
COMPARE_S_SIMD (state[0], state[1], state[2], state[3]);
}

@ -61,9 +61,9 @@ __constant u32a lotus_magic_table[256] =
#define BOX1(S,i) (u32x) ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3], (S)[(i).s4], (S)[(i).s5], (S)[(i).s6], (S)[(i).s7], (S)[(i).s8], (S)[(i).s9], (S)[(i).sa], (S)[(i).sb], (S)[(i).sc], (S)[(i).sd], (S)[(i).se], (S)[(i).sf])
#endif
void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table)
{
u32x p = 0;
u32 p = 0;
for (int i = 0; i < 18; i++)
{
@ -71,8 +71,8 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
u32x tmp_out = 0;
u32 tmp_in = in[j];
u32 tmp_out = 0;
p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 0;
p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX1 (s_lotus_magic_table, p); tmp_out |= p << 8;
@ -84,11 +84,11 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
}
}
void lotus_transform_password (const u32x in[4], u32x out[4], __local u32 *s_lotus_magic_table)
void lotus_transform_password (const u32 in[4], u32x out[4], __local u32 *s_lotus_magic_table)
{
u32x t = out[3] >> 24;
u32 t = out[3] >> 24;
u32x c;
u32 c;
#ifdef _unroll
#pragma unroll
@ -181,9 +181,9 @@ void pad (u32 w[4], const u32 len)
}
}
void mdtransform_norecalc (u32x state[4], const u32x block[4], __local u32 *s_lotus_magic_table)
void mdtransform_norecalc (u32 state[4], const u32x block[4], __local u32 *s_lotus_magic_table)
{
u32x x[12];
u32 x[12];
x[ 0] = state[0];
x[ 1] = state[1];
@ -206,16 +206,16 @@ void mdtransform_norecalc (u32x state[4], const u32x block[4], __local u32 *s_lo
state[3] = x[3];
}
void mdtransform (u32x state[4], u32x checksum[4], const u32x block[4], __local u32 *s_lotus_magic_table)
void mdtransform (u32 state[4], u32x checksum[4], const u32x block[4], __local u32 *s_lotus_magic_table)
{
mdtransform_norecalc (state, block, s_lotus_magic_table);
lotus_transform_password (block, checksum, s_lotus_magic_table);
}
void domino_big_md (const u32x saved_key[4], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table)
void domino_big_md (const u32 saved_key[4], const u32 size, u32x state[4], __local u32 *s_lotus_magic_table)
{
u32x checksum[4];
u32 checksum[4];
checksum[0] = 0;
checksum[1] = 0;
@ -276,18 +276,18 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -298,10 +298,10 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -321,7 +321,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32 w0[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -332,7 +332,7 @@ __kernel void m08600_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* domino
*/
u32x state[4];
u32 state[4];
state[0] = 0;
state[1] = 0;
@ -412,18 +412,18 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos);
const u32x pw_len = pw_l_len + pw_r_len;
const u32 pw_len = pw_l_len + pw_r_len;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
u32 wordl0[4] = { 0 };
u32 wordl1[4] = { 0 };
u32 wordl2[4] = { 0 };
u32 wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
@ -434,10 +434,10 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
u32 wordr0[4] = { 0 };
u32 wordr1[4] = { 0 };
u32 wordr2[4] = { 0 };
u32 wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
@ -457,7 +457,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32 w0[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
@ -468,7 +468,7 @@ __kernel void m08600_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* domino
*/
u32x state[4];
u32 state[4];
state[0] = 0;
state[1] = 0;

Loading…
Cancel
Save