switch from clz() to ffz() for bitsliced algorithms

pull/488/head
Jens Steube 8 years ago
parent 71a8459d85
commit a267c61fbb

@ -7,6 +7,18 @@
* pure scalar functions
*/
inline int ffz (const u32 v)
{
for (int i = 0; i < 32; i++)
{
if ((v >> i) & 1) continue;
return i;
}
return -1;
}
inline int hash_comp (const u32 d1[4], __global u32 *d2)
{
if (d1[3] > d2[DGST_R3]) return ( 1);

@ -2214,7 +2214,7 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) continue;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
const u32 r0 = search[0];
const u32 r1 = search[1];
@ -2234,8 +2234,8 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + 31 - i];
out1[i] = out[32 + 31 - i];
out0[i] = out[ 0 + i];
out1[i] = out[32 + i];
}
transpose32c (out0);
@ -2246,8 +2246,8 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[31 - slice];
const u32 r1 = out1[31 - slice];
const u32 r0 = out0[slice];
const u32 r1 = out1[slice];
const u32 r2 = 0;
const u32 r3 = 0;
@ -2640,7 +2640,7 @@ void m01500s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) return;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
#include COMPARE_S
}

@ -2056,7 +2056,7 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) continue;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
const u32 r0 = search[0];
const u32 r1 = search[1];
@ -2076,8 +2076,8 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + 31 - i];
out1[i] = out[32 + 31 - i];
out0[i] = out[ 0 + i];
out1[i] = out[32 + i];
}
transpose32c (out0);
@ -2088,8 +2088,8 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[31 - slice];
const u32 r1 = out1[31 - slice];
const u32 r0 = out0[slice];
const u32 r1 = out1[slice];
const u32 r2 = 0;
const u32 r3 = 0;
@ -2482,7 +2482,7 @@ void m03000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) return;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
#include COMPARE_S
}

@ -2194,7 +2194,7 @@ void m14000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) continue;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
const u32 r0 = search[0];
const u32 r1 = search[1];
@ -2214,8 +2214,8 @@ void m14000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + 31 - i];
out1[i] = out[32 + 31 - i];
out0[i] = out[ 0 + i];
out1[i] = out[32 + i];
}
transpose32c (out0);
@ -2226,8 +2226,8 @@ void m14000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[31 - slice];
const u32 r1 = out1[31 - slice];
const u32 r0 = out0[slice];
const u32 r1 = out1[slice];
const u32 r2 = 0;
const u32 r3 = 0;
@ -2684,7 +2684,7 @@ void m14000s (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
if (tmpResult == 0xffffffff) return;
const u32 slice = 31 - clz (~tmpResult);
const u32 slice = ffz (tmpResult);
#include COMPARE_S
}

Loading…
Cancel
Save