Replace some SIMD related function calls

pull/1335/head
jsteube 7 years ago
parent 9a46442a80
commit 5b5bdf3889

@ -31,8 +31,8 @@ static void append_four_byte (const u32 *buf_src, const int off_src, u32 *buf_ds
t64 >>= sm8;
t64 <<= dm8;
const u32 t0 = l32_from_64 (t64);
const u32 t1 = h32_from_64 (t64);
const u32 t0 = l32_from_64_S (t64);
const u32 t1 = h32_from_64_S (t64);
buf_dst[dd + 0] |= t0;
buf_dst[dd + 1] |= t1;
@ -54,8 +54,8 @@ static void append_three_byte (const u32 *buf_src, const int off_src, u32 *buf_d
t64 &= 0x00ffffff;
t64 <<= dm8;
const u32 t0 = l32_from_64 (t64);
const u32 t1 = h32_from_64 (t64);
const u32 t0 = l32_from_64_S (t64);
const u32 t1 = h32_from_64_S (t64);
buf_dst[dd + 0] |= t0;
buf_dst[dd + 1] |= t1;
@ -77,8 +77,8 @@ static void append_two_byte (const u32 *buf_src, const int off_src, u32 *buf_dst
t64 &= 0x0000ffff;
t64 <<= dm8;
const u32 t0 = l32_from_64 (t64);
const u32 t1 = h32_from_64 (t64);
const u32 t0 = l32_from_64_S (t64);
const u32 t1 = h32_from_64_S (t64);
buf_dst[dd + 0] |= t0;
buf_dst[dd + 1] |= t1;

@ -477,10 +477,10 @@ __kernel void m01800_comp (__global pw_t *pws, __global const kernel_rule_t *rul
const u64 a = swap64 (tmps[gid].l_alt_result[0]);
const u64 b = swap64 (tmps[gid].l_alt_result[1]);
const u32 r0 = l32_from_64 (a);
const u32 r1 = h32_from_64 (a);
const u32 r2 = l32_from_64 (b);
const u32 r3 = h32_from_64 (b);
const u32 r0 = l32_from_64_S (a);
const u32 r1 = h32_from_64_S (a);
const u32 r2 = l32_from_64_S (b);
const u32 r3 = h32_from_64_S (b);
#define il_pos 0

@ -535,14 +535,14 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
u32 ukey3[8];
#endif
ukey3[0] = swap32_S (h32_from_64 (tmps[gid].out[ 8]));
ukey3[1] = swap32_S (l32_from_64 (tmps[gid].out[ 8]));
ukey3[2] = swap32_S (h32_from_64 (tmps[gid].out[ 9]));
ukey3[3] = swap32_S (l32_from_64 (tmps[gid].out[ 9]));
ukey3[4] = swap32_S (h32_from_64 (tmps[gid].out[10]));
ukey3[5] = swap32_S (l32_from_64 (tmps[gid].out[10]));
ukey3[6] = swap32_S (h32_from_64 (tmps[gid].out[11]));
ukey3[7] = swap32_S (l32_from_64 (tmps[gid].out[11]));
ukey3[0] = swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
ukey3[1] = swap32_S (l32_from_64_S (tmps[gid].out[ 8]));
ukey3[2] = swap32_S (h32_from_64_S (tmps[gid].out[ 9]));
ukey3[3] = swap32_S (l32_from_64_S (tmps[gid].out[ 9]));
ukey3[4] = swap32_S (h32_from_64_S (tmps[gid].out[10]));
ukey3[5] = swap32_S (l32_from_64_S (tmps[gid].out[10]));
ukey3[6] = swap32_S (h32_from_64_S (tmps[gid].out[11]));
ukey3[7] = swap32_S (l32_from_64_S (tmps[gid].out[11]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey4[8];
@ -550,14 +550,14 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
u32 ukey4[8];
#endif
ukey4[0] = swap32_S (h32_from_64 (tmps[gid].out[12]));
ukey4[1] = swap32_S (l32_from_64 (tmps[gid].out[12]));
ukey4[2] = swap32_S (h32_from_64 (tmps[gid].out[13]));
ukey4[3] = swap32_S (l32_from_64 (tmps[gid].out[13]));
ukey4[4] = swap32_S (h32_from_64 (tmps[gid].out[14]));
ukey4[5] = swap32_S (l32_from_64 (tmps[gid].out[14]));
ukey4[6] = swap32_S (h32_from_64 (tmps[gid].out[15]));
ukey4[7] = swap32_S (l32_from_64 (tmps[gid].out[15]));
ukey4[0] = swap32_S (h32_from_64_S (tmps[gid].out[12]));
ukey4[1] = swap32_S (l32_from_64_S (tmps[gid].out[12]));
ukey4[2] = swap32_S (h32_from_64_S (tmps[gid].out[13]));
ukey4[3] = swap32_S (l32_from_64_S (tmps[gid].out[13]));
ukey4[4] = swap32_S (h32_from_64_S (tmps[gid].out[14]));
ukey4[5] = swap32_S (l32_from_64_S (tmps[gid].out[14]));
ukey4[6] = swap32_S (h32_from_64_S (tmps[gid].out[15]));
ukey4[7] = swap32_S (l32_from_64_S (tmps[gid].out[15]));
if (verify_header_aes_twofish (esalt_bufs, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
{

@ -535,14 +535,14 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
u32 ukey3[8];
#endif
ukey3[0] = swap32_S (h32_from_64 (tmps[gid].out[ 8]));
ukey3[1] = swap32_S (l32_from_64 (tmps[gid].out[ 8]));
ukey3[2] = swap32_S (h32_from_64 (tmps[gid].out[ 9]));
ukey3[3] = swap32_S (l32_from_64 (tmps[gid].out[ 9]));
ukey3[4] = swap32_S (h32_from_64 (tmps[gid].out[10]));
ukey3[5] = swap32_S (l32_from_64 (tmps[gid].out[10]));
ukey3[6] = swap32_S (h32_from_64 (tmps[gid].out[11]));
ukey3[7] = swap32_S (l32_from_64 (tmps[gid].out[11]));
ukey3[0] = swap32_S (h32_from_64_S (tmps[gid].out[ 8]));
ukey3[1] = swap32_S (l32_from_64_S (tmps[gid].out[ 8]));
ukey3[2] = swap32_S (h32_from_64_S (tmps[gid].out[ 9]));
ukey3[3] = swap32_S (l32_from_64_S (tmps[gid].out[ 9]));
ukey3[4] = swap32_S (h32_from_64_S (tmps[gid].out[10]));
ukey3[5] = swap32_S (l32_from_64_S (tmps[gid].out[10]));
ukey3[6] = swap32_S (h32_from_64_S (tmps[gid].out[11]));
ukey3[7] = swap32_S (l32_from_64_S (tmps[gid].out[11]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey4[8];
@ -550,14 +550,14 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
u32 ukey4[8];
#endif
ukey4[0] = swap32_S (h32_from_64 (tmps[gid].out[12]));
ukey4[1] = swap32_S (l32_from_64 (tmps[gid].out[12]));
ukey4[2] = swap32_S (h32_from_64 (tmps[gid].out[13]));
ukey4[3] = swap32_S (l32_from_64 (tmps[gid].out[13]));
ukey4[4] = swap32_S (h32_from_64 (tmps[gid].out[14]));
ukey4[5] = swap32_S (l32_from_64 (tmps[gid].out[14]));
ukey4[6] = swap32_S (h32_from_64 (tmps[gid].out[15]));
ukey4[7] = swap32_S (l32_from_64 (tmps[gid].out[15]));
ukey4[0] = swap32_S (h32_from_64_S (tmps[gid].out[12]));
ukey4[1] = swap32_S (l32_from_64_S (tmps[gid].out[12]));
ukey4[2] = swap32_S (h32_from_64_S (tmps[gid].out[13]));
ukey4[3] = swap32_S (l32_from_64_S (tmps[gid].out[13]));
ukey4[4] = swap32_S (h32_from_64_S (tmps[gid].out[14]));
ukey4[5] = swap32_S (l32_from_64_S (tmps[gid].out[14]));
ukey4[6] = swap32_S (h32_from_64_S (tmps[gid].out[15]));
ukey4[7] = swap32_S (l32_from_64_S (tmps[gid].out[15]));
if (verify_header_aes_twofish (esalt_bufs, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
{
@ -589,14 +589,14 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
volatile u32 ukey5[8];
#endif
ukey5[0] = swap32_S (h32_from_64 (tmps[gid].out[16]));
ukey5[1] = swap32_S (l32_from_64 (tmps[gid].out[16]));
ukey5[2] = swap32_S (h32_from_64 (tmps[gid].out[17]));
ukey5[3] = swap32_S (l32_from_64 (tmps[gid].out[17]));
ukey5[4] = swap32_S (h32_from_64 (tmps[gid].out[18]));
ukey5[5] = swap32_S (l32_from_64 (tmps[gid].out[18]));
ukey5[6] = swap32_S (h32_from_64 (tmps[gid].out[19]));
ukey5[7] = swap32_S (l32_from_64 (tmps[gid].out[19]));
ukey5[0] = swap32_S (h32_from_64_S (tmps[gid].out[16]));
ukey5[1] = swap32_S (l32_from_64_S (tmps[gid].out[16]));
ukey5[2] = swap32_S (h32_from_64_S (tmps[gid].out[17]));
ukey5[3] = swap32_S (l32_from_64_S (tmps[gid].out[17]));
ukey5[4] = swap32_S (h32_from_64_S (tmps[gid].out[18]));
ukey5[5] = swap32_S (l32_from_64_S (tmps[gid].out[18]));
ukey5[6] = swap32_S (h32_from_64_S (tmps[gid].out[19]));
ukey5[7] = swap32_S (l32_from_64_S (tmps[gid].out[19]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey6[8];
@ -604,14 +604,14 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
volatile u32 ukey6[8];
#endif
ukey6[0] = swap32_S (h32_from_64 (tmps[gid].out[20]));
ukey6[1] = swap32_S (l32_from_64 (tmps[gid].out[20]));
ukey6[2] = swap32_S (h32_from_64 (tmps[gid].out[21]));
ukey6[3] = swap32_S (l32_from_64 (tmps[gid].out[21]));
ukey6[4] = swap32_S (h32_from_64 (tmps[gid].out[22]));
ukey6[5] = swap32_S (l32_from_64 (tmps[gid].out[22]));
ukey6[6] = swap32_S (h32_from_64 (tmps[gid].out[23]));
ukey6[7] = swap32_S (l32_from_64 (tmps[gid].out[23]));
ukey6[0] = swap32_S (h32_from_64_S (tmps[gid].out[20]));
ukey6[1] = swap32_S (l32_from_64_S (tmps[gid].out[20]));
ukey6[2] = swap32_S (h32_from_64_S (tmps[gid].out[21]));
ukey6[3] = swap32_S (l32_from_64_S (tmps[gid].out[21]));
ukey6[4] = swap32_S (h32_from_64_S (tmps[gid].out[22]));
ukey6[5] = swap32_S (l32_from_64_S (tmps[gid].out[22]));
ukey6[6] = swap32_S (h32_from_64_S (tmps[gid].out[23]));
ukey6[7] = swap32_S (l32_from_64_S (tmps[gid].out[23]));
if (verify_header_aes_twofish_serpent (esalt_bufs, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1)
{

@ -59,10 +59,10 @@ __kernel void m10800_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha384_final (&ctx);
const u32 r0 = l32_from_64 (ctx.h[3]);
const u32 r1 = h32_from_64 (ctx.h[3]);
const u32 r2 = l32_from_64 (ctx.h[2]);
const u32 r3 = h32_from_64 (ctx.h[2]);
const u32 r0 = l32_from_64_S (ctx.h[3]);
const u32 r1 = h32_from_64_S (ctx.h[3]);
const u32 r2 = l32_from_64_S (ctx.h[2]);
const u32 r3 = h32_from_64_S (ctx.h[2]);
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
@ -124,10 +124,10 @@ __kernel void m10800_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha384_final (&ctx);
const u32 r0 = l32_from_64 (ctx.h[3]);
const u32 r1 = h32_from_64 (ctx.h[3]);
const u32 r2 = l32_from_64 (ctx.h[2]);
const u32 r3 = h32_from_64 (ctx.h[2]);
const u32 r0 = l32_from_64_S (ctx.h[3]);
const u32 r1 = h32_from_64_S (ctx.h[3]);
const u32 r2 = l32_from_64_S (ctx.h[2]);
const u32 r3 = h32_from_64_S (ctx.h[2]);
COMPARE_S_SCALAR (r0, r1, r2, r3);
}

@ -46,10 +46,10 @@ __kernel void m10800_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha384_final (&ctx);
const u32 r0 = l32_from_64 (ctx.h[3]);
const u32 r1 = h32_from_64 (ctx.h[3]);
const u32 r2 = l32_from_64 (ctx.h[2]);
const u32 r3 = h32_from_64 (ctx.h[2]);
const u32 r0 = l32_from_64_S (ctx.h[3]);
const u32 r1 = h32_from_64_S (ctx.h[3]);
const u32 r2 = l32_from_64_S (ctx.h[2]);
const u32 r3 = h32_from_64_S (ctx.h[2]);
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
@ -100,10 +100,10 @@ __kernel void m10800_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha384_final (&ctx);
const u32 r0 = l32_from_64 (ctx.h[3]);
const u32 r1 = h32_from_64 (ctx.h[3]);
const u32 r2 = l32_from_64 (ctx.h[2]);
const u32 r3 = h32_from_64 (ctx.h[2]);
const u32 r0 = l32_from_64_S (ctx.h[3]);
const u32 r1 = h32_from_64_S (ctx.h[3]);
const u32 r2 = l32_from_64_S (ctx.h[2]);
const u32 r3 = h32_from_64_S (ctx.h[2]);
COMPARE_S_SCALAR (r0, r1, r2, r3);
}

Loading…
Cancel
Save