Optimize kernels for ROCm 1.6

- Remove inline keywords
- Remove volatile keywords where it causes ROCm to slow down
- Replace DES functions (looks like bitselect somehow is no longer mapped to BFI_INT)
pull/1309/head
jsteube 7 years ago
parent 9ca3158ed8
commit 5e34ec348e

@ -7,7 +7,7 @@
* pure scalar functions
*/
inline int ffz (const u32 v)
int ffz (const u32 v)
{
#ifdef _unroll
#pragma unroll
@ -22,7 +22,7 @@ inline int ffz (const u32 v)
return -1;
}
inline int hash_comp (const u32 d1[4], __global const u32 *d2)
int hash_comp (const u32 d1[4], __global const u32 *d2)
{
if (d1[3] > d2[DGST_R3]) return ( 1);
if (d1[3] < d2[DGST_R3]) return (-1);
@ -36,7 +36,7 @@ inline int hash_comp (const u32 d1[4], __global const u32 *d2)
return (0);
}
inline int find_hash (const u32 digest[4], const u32 digests_cnt, __global const digest_t *digests_buf)
int find_hash (const u32 digest[4], const u32 digests_cnt, __global const digest_t *digests_buf)
{
for (u32 l = 0, r = digests_cnt; r; r >>= 1)
{
@ -59,12 +59,12 @@ inline int find_hash (const u32 digest[4], const u32 digests_cnt, __global const
return (-1);
}
inline u32 check_bitmap (__global const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
u32 check_bitmap (__global const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
{
return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
}
inline u32 check (const u32 digest[4], __global const u32 *bitmap_s1_a, __global const u32 *bitmap_s1_b, __global const u32 *bitmap_s1_c, __global const u32 *bitmap_s1_d, __global const u32 *bitmap_s2_a, __global const u32 *bitmap_s2_b, __global const u32 *bitmap_s2_c, __global const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
u32 check (const u32 digest[4], __global const u32 *bitmap_s1_a, __global const u32 *bitmap_s1_b, __global const u32 *bitmap_s1_c, __global const u32 *bitmap_s1_d, __global const u32 *bitmap_s2_a, __global const u32 *bitmap_s2_b, __global const u32 *bitmap_s2_c, __global const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
{
if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
@ -79,7 +79,7 @@ inline u32 check (const u32 digest[4], __global const u32 *bitmap_s1_a, __global
return (1);
}
inline void mark_hash (__global plain_t *plains_buf, __global u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u32 gid, const u32 il_pos)
void mark_hash (__global plain_t *plains_buf, __global u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u32 gid, const u32 il_pos)
{
const u32 idx = atomic_inc (d_result);
@ -100,7 +100,7 @@ inline void mark_hash (__global plain_t *plains_buf, __global u32 *d_result, con
plains_buf[idx].il_pos = il_pos;
}
inline int count_char (const u32 *buf, const int elems, const u32 c)
int count_char (const u32 *buf, const int elems, const u32 c)
{
int r = 0;
@ -117,7 +117,7 @@ inline int count_char (const u32 *buf, const int elems, const u32 c)
return r;
}
inline float get_entropy (const u32 *buf, const int elems)
float get_entropy (const u32 *buf, const int elems)
{
const int length = elems * 4;
@ -144,7 +144,7 @@ inline float get_entropy (const u32 *buf, const int elems)
* vector functions
*/
inline void truncate_block_4x4_le (u32x w0[4], const u32 len)
void truncate_block_4x4_le (u32x w0[4], const u32 len)
{
switch (len)
{
@ -254,7 +254,7 @@ inline void truncate_block_4x4_le (u32x w0[4], const u32 len)
}
}
inline void truncate_block_16x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 len)
void truncate_block_16x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 len)
{
switch (len)
{
@ -1060,7 +1060,7 @@ inline void truncate_block_16x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[
}
}
inline void truncate_block_4x4_be (u32x w0[4], const u32 len)
void truncate_block_4x4_be (u32x w0[4], const u32 len)
{
switch (len)
{
@ -1170,7 +1170,7 @@ inline void truncate_block_4x4_be (u32x w0[4], const u32 len)
}
}
inline void truncate_block_16x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 len)
void truncate_block_16x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 len)
{
switch (len)
{
@ -1976,7 +1976,7 @@ inline void truncate_block_16x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[
}
}
inline void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm (in[3], 0, 0x3727);
@ -2001,7 +2001,7 @@ inline void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
#endif
}
inline void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm (in[3], 0, 0x1707);
@ -2026,7 +2026,7 @@ inline void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
#endif
}
inline void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm (in[3], 0, 0x7372);
@ -2051,7 +2051,7 @@ inline void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
#endif
}
inline void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
{
#ifdef IS_NV
out[0] = __byte_perm (in1[0], in1[1], 0x4602);
@ -2072,7 +2072,7 @@ inline void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
#endif
}
inline void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
{
#ifdef IS_NV
out[0] = __byte_perm (in1[0], in1[1], 0x6420);
@ -2093,7 +2093,7 @@ inline void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
#endif
}
inline void append_0x80_1x4 (u32x w0[4], const u32 offset)
void append_0x80_1x4 (u32x w0[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -2103,7 +2103,7 @@ inline void append_0x80_1x4 (u32x w0[4], const u32 offset)
w0[3] |= (offset >= 12) ? tmp : 0;
}
inline void append_0x80_2x4 (u32x w0[4], u32x w1[4], const u32 offset)
void append_0x80_2x4 (u32x w0[4], u32x w1[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -2117,7 +2117,7 @@ inline void append_0x80_2x4 (u32x w0[4], u32x w1[4], const u32 offset)
w1[3] |= (offset >= 28) ? tmp : 0;
}
inline void append_0x80_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset)
void append_0x80_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -2135,7 +2135,7 @@ inline void append_0x80_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offse
w2[3] |= (offset >= 44) ? tmp : 0;
}
inline void append_0x80_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
void append_0x80_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -2157,7 +2157,7 @@ inline void append_0x80_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], con
w3[3] |= (offset >= 60) ? tmp : 0;
}
inline void append_0x80_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
void append_0x80_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
{
switch (offset)
{
@ -2675,7 +2675,7 @@ inline void append_0x80_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32
}
}
inline void append_0x80_1x16 (u32x w[16], const u32 offset)
void append_0x80_1x16 (u32x w[16], const u32 offset)
{
switch (offset)
{
@ -2937,7 +2937,7 @@ inline void append_0x80_1x16 (u32x w[16], const u32 offset)
}
}
inline void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -3798,7 +3798,7 @@ inline void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
#endif
}
inline void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
{
const int offset_mod_4 = offset & 3;
@ -4600,7 +4600,7 @@ inline void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
}
}
inline void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -5255,7 +5255,7 @@ inline void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
#endif
}
inline void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -6182,7 +6182,7 @@ inline void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4]
#endif
}
inline void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -7795,7 +7795,7 @@ inline void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
#endif
}
inline void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -10114,7 +10114,7 @@ inline void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4],
#endif
}
inline void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset)
void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -13489,7 +13489,7 @@ inline void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w
#endif
}
inline void overwrite_at_le (u32x sw[16], const u32x w0, const u32 salt_len)
void overwrite_at_le (u32x sw[16], const u32x w0, const u32 salt_len)
{
#if defined cl_amd_media_ops
switch (salt_len)
@ -13678,7 +13678,7 @@ inline void overwrite_at_le (u32x sw[16], const u32x w0, const u32 salt_len)
#endif
}
inline void overwrite_at_be (u32x sw[16], const u32x w0, const u32 salt_len)
void overwrite_at_be (u32x sw[16], const u32x w0, const u32 salt_len)
{
// would be nice to have optimization based on amd_bytealign as with _le counterpart
@ -13775,7 +13775,7 @@ inline void overwrite_at_be (u32x sw[16], const u32x w0, const u32 salt_len)
}
}
inline void overwrite_at_le_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x wx, const u32 salt_len)
void overwrite_at_le_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x wx, const u32 salt_len)
{
#if defined cl_amd_media_ops
switch (salt_len)
@ -14140,7 +14140,7 @@ inline void overwrite_at_le_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4],
#endif
}
inline void overwrite_at_be_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x wx, const u32 salt_len)
void overwrite_at_be_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x wx, const u32 salt_len)
{
// would be nice to have optimization based on amd_bytealign as with _le counterpart
@ -14329,7 +14329,7 @@ inline void overwrite_at_be_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4],
* vector functions as scalar (for outer loop usage)
*/
inline void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
{
const u32 tmp = 0x01 << ((offset & 3) * 8);
@ -14343,7 +14343,7 @@ inline void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
w1[3] |= (offset >= 28) ? tmp : 0;
}
inline void append_0x80_1x4_S (u32 w0[4], const u32 offset)
void append_0x80_1x4_S (u32 w0[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -14353,7 +14353,7 @@ inline void append_0x80_1x4_S (u32 w0[4], const u32 offset)
w0[3] |= (offset >= 12) ? tmp : 0;
}
inline void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -14367,7 +14367,7 @@ inline void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
w1[3] |= (offset >= 28) ? tmp : 0;
}
inline void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -14385,7 +14385,7 @@ inline void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset
w2[3] |= (offset >= 44) ? tmp : 0;
}
inline void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
const u32 tmp = 0x80 << ((offset & 3) * 8);
@ -14407,7 +14407,7 @@ inline void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const
w3[3] |= (offset >= 60) ? tmp : 0;
}
inline void append_0x80_8x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
void append_0x80_8x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
switch (offset)
{
@ -14925,7 +14925,7 @@ inline void append_0x80_8x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w
}
}
inline void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm_S (in[3], 0, 0x3727);
@ -14950,7 +14950,7 @@ inline void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
#endif
}
inline void make_utf16beN_S (const u32 in[4], u32 out1[4], u32 out2[4])
void make_utf16beN_S (const u32 in[4], u32 out1[4], u32 out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm_S (in[3], 0, 0x1707);
@ -14975,7 +14975,7 @@ inline void make_utf16beN_S (const u32 in[4], u32 out1[4], u32 out2[4])
#endif
}
inline void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
{
#ifdef IS_NV
out2[3] = __byte_perm_S (in[3], 0, 0x7372);
@ -15000,7 +15000,7 @@ inline void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
#endif
}
inline void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
{
#ifdef IS_NV
out[0] = __byte_perm_S (in1[0], in1[1], 0x4602);
@ -15021,7 +15021,7 @@ inline void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
#endif
}
inline void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
{
#ifdef IS_NV
out[0] = __byte_perm_S (in1[0], in1[1], 0x6420);
@ -15042,7 +15042,7 @@ inline void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
#endif
}
inline void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -15903,7 +15903,7 @@ inline void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
#endif
}
inline void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
{
const int offset_mod_4 = offset & 3;
@ -16705,7 +16705,7 @@ inline void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
}
}
inline void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -17360,7 +17360,7 @@ inline void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
#endif
}
inline void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -18287,7 +18287,7 @@ inline void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4],
#endif
}
inline void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -19900,7 +19900,7 @@ inline void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
#endif
}
inline void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -22219,7 +22219,7 @@ inline void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u
#endif
}
inline void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset)
void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -25594,7 +25594,7 @@ inline void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2
#endif
}
inline void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -36655,7 +36655,7 @@ inline void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
#endif
}
inline void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
@ -45438,7 +45438,7 @@ inline void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
PACKSV4 (s6, v6, e); \
PACKSV4 (s7, v7, e);
inline void switch_buffer_by_offset_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
void switch_buffer_by_offset_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
{
#if VECT_SIZE == 1
@ -45498,7 +45498,7 @@ inline void switch_buffer_by_offset_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u
#endif
}
inline void switch_buffer_by_offset_8x4_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32x offset)
void switch_buffer_by_offset_8x4_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32x offset)
{
#if VECT_SIZE == 1
@ -45678,7 +45678,7 @@ inline void switch_buffer_by_offset_8x4_le_VV (u32x w0[4], u32x w1[4], u32x w2[4
#endif
}
inline void append_0x01_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
void append_0x01_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
{
#if VECT_SIZE == 1
@ -45736,7 +45736,7 @@ inline void append_0x01_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
#endif
}
inline void append_0x80_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
void append_0x80_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
{
#if VECT_SIZE == 1
@ -45794,7 +45794,7 @@ inline void append_0x80_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
#endif
}
inline void append_0x80_4x4_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
void append_0x80_4x4_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
{
#if VECT_SIZE == 1

@ -111,7 +111,7 @@ void md4_init (md4_ctx_t *ctx)
void md4_update_64 (md4_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1234,7 +1234,7 @@ void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx0)
void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -145,7 +145,7 @@ void md5_init (md5_ctx_t *ctx)
void md5_update_64 (md5_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1303,7 +1303,7 @@ void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx0)
void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -245,7 +245,7 @@ void ripemd160_init (ripemd160_ctx_t *ctx)
void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1504,7 +1504,7 @@ void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ripemd160_c
void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -177,7 +177,7 @@ void sha1_init (sha1_ctx_t *ctx)
void sha1_update_64 (sha1_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1368,7 +1368,7 @@ void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t *ctx0)
void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -162,7 +162,7 @@ void sha224_init (sha224_ctx_t *ctx)
void sha224_update_64 (sha224_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1321,7 +1321,7 @@ void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_ctx_t *ctx
void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -162,7 +162,7 @@ void sha256_init (sha256_ctx_t *ctx)
void sha256_update_64 (sha256_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -1321,7 +1321,7 @@ void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_ctx_t *ctx
void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -186,7 +186,7 @@ void sha384_init (sha384_ctx_t *ctx)
void sha384_update_128 (sha384_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 127;
const int pos = ctx->len & 127;
#else
const int pos = ctx->len & 127;
#endif
@ -2017,7 +2017,7 @@ void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_ctx_t *ctx
void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 127;
const int pos = ctx->len & 127;
#else
const int pos = ctx->len & 127;
#endif

@ -186,7 +186,7 @@ void sha512_init (sha512_ctx_t *ctx)
void sha512_update_128 (sha512_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 127;
const int pos = ctx->len & 127;
#else
const int pos = ctx->len & 127;
#endif
@ -2017,7 +2017,7 @@ void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_ctx_t *ctx
void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 127;
const int pos = ctx->len & 127;
#else
const int pos = ctx->len & 127;
#endif

@ -1345,7 +1345,7 @@ void whirlpool_init (whirlpool_ctx_t *ctx, __local u32 (*s_Ch)[256], __local u32
void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif
@ -2608,7 +2608,7 @@ void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whirlpool_c
void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
{
#ifdef IS_AMD
volatile const int pos = ctx->len & 63;
const int pos = ctx->len & 63;
#else
const int pos = ctx->len & 63;
#endif

@ -3,7 +3,7 @@
* License.....: MIT
*/
inline u32 generate_cmask (const u32 value)
u32 generate_cmask (const u32 value)
{
const u32 rmask = ((value & 0x40404040u) >> 1u)
& ~((value & 0x80808080u) >> 2u);
@ -14,7 +14,7 @@ inline u32 generate_cmask (const u32 value)
return rmask & ~hmask & lmask;
}
inline void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset)
void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = (1u << ((offset & 3u) * 8u)) - 1u;
@ -67,7 +67,7 @@ inline void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset)
}
}
inline void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset)
void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = ~((1u << ((offset & 3u) * 8u)) - 1u);
@ -120,7 +120,7 @@ inline void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset)
}
}
inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
{
out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
@ -132,7 +132,7 @@ inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 o
out1[3] = amd_bytealign_S ( 0, in1[3], 1);
}
inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4])
{
out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
@ -144,7 +144,7 @@ inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 o
out0[0] = amd_bytealign_S (in0[0], 0, 3);
}
inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
{
switch (num)
{
@ -439,7 +439,7 @@ inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32
}
}
inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num)
{
switch (num)
{
@ -734,7 +734,7 @@ inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32
}
}
inline void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_r0)
void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_r0)
{
// this version works with 1 byte append only
@ -754,7 +754,7 @@ inline void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
buf1[3] |= (offset >= 28) ? tmp : 0;
}
inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
@ -1012,7 +1012,7 @@ inline void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32
#endif
}
inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len)
{
rshift_block_N (in0, in1, out0, out1, 32 - len);
@ -1038,7 +1038,7 @@ inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], con
out1[3] = swap32_S (tib41[3]);
}
inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] |= (generate_cmask (buf0[0]));
buf0[1] |= (generate_cmask (buf0[1]));
@ -1052,7 +1052,7 @@ inline u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu
return in_len;
}
inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] &= ~(generate_cmask (buf0[0]));
buf0[1] &= ~(generate_cmask (buf0[1]));
@ -1066,7 +1066,7 @@ inline u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu
return in_len;
}
inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len);
@ -1075,7 +1075,7 @@ inline u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
rule_op_mangle_urest (p0, p1, buf0, buf1, in_len);
@ -1084,7 +1084,7 @@ inline u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] ^= (generate_cmask (buf0[0]));
buf0[1] ^= (generate_cmask (buf0[1]));
@ -1098,7 +1098,7 @@ inline u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 bu
return in_len;
}
inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1119,14 +1119,14 @@ inline u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u3
return in_len;
}
inline u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
reverse_block (buf0, buf1, buf0, buf1, in_len);
return in_len;
}
inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + in_len) >= 32) return (in_len);
@ -1139,7 +1139,7 @@ inline u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32
return out_len;
}
inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (((in_len * p0) + in_len) >= 32) return (in_len);
@ -1167,7 +1167,7 @@ inline u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4
return out_len;
}
inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + in_len) >= 32) return (in_len);
@ -1185,7 +1185,7 @@ inline u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32
return out_len;
}
inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + 1) >= 32) return (in_len);
@ -1198,7 +1198,7 @@ inline u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 b
return out_len;
}
inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((in_len + 1) >= 32) return (in_len);
@ -1213,7 +1213,7 @@ inline u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32
return out_len;
}
inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
@ -1237,7 +1237,7 @@ inline u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
@ -1267,7 +1267,7 @@ inline u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
@ -1278,7 +1278,7 @@ inline u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4],
return in_len1;
}
inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len == 0) return (in_len);
@ -1298,7 +1298,7 @@ inline u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4],
return in_len1;
}
inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1373,7 +1373,7 @@ inline u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u3
return out_len;
}
inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1388,7 +1388,7 @@ inline u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32
return out_len;
}
inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1474,7 +1474,7 @@ inline u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf
return out_len;
}
inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
@ -1546,7 +1546,7 @@ inline u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 b
return out_len;
}
inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1569,7 +1569,7 @@ inline u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u
return in_len;
}
inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -1578,7 +1578,7 @@ inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4],
return p0;
}
inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
const uchar4 tmp0 = (uchar4) (p0);
const uchar4 tmp1 = (uchar4) (p1);
@ -1597,7 +1597,7 @@ inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32
return in_len;
}
inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
u32 out_len = 0;
@ -1638,13 +1638,13 @@ inline u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u3
return out_len;
}
inline u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
// TODO
return in_len;
}
inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + p0) >= 32) return (in_len);
@ -1831,7 +1831,7 @@ inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4
return out_len;
}
inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + p0) >= 32) return (in_len);
@ -1865,7 +1865,7 @@ inline u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4]
return out_len;
}
inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ( in_len == 0) return (in_len);
if ((in_len + in_len) >= 32) return (in_len);
@ -1898,7 +1898,7 @@ inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4],
return out_len;
}
inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len < 2) return (in_len);
@ -1907,7 +1907,7 @@ inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (in_len < 2) return (in_len);
@ -1992,7 +1992,7 @@ inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
if (p1 >= in_len) return (in_len);
@ -2239,7 +2239,7 @@ inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u3
return in_len;
}
inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -2261,7 +2261,7 @@ inline u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u
return in_len;
}
inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -2283,7 +2283,7 @@ inline u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u
return in_len;
}
inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -2307,7 +2307,7 @@ inline u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32
return in_len;
}
inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 >= in_len) return (in_len);
@ -2331,7 +2331,7 @@ inline u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32
return in_len;
}
inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if ((p0 + 1) >= in_len) return (in_len);
@ -2358,7 +2358,7 @@ inline u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 == 0) return (in_len);
@ -2387,7 +2387,7 @@ inline u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4],
return in_len;
}
inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
@ -2425,7 +2425,7 @@ inline u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[
return out_len;
}
inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
if (p0 > in_len) return (in_len);
@ -2454,7 +2454,7 @@ inline u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4
return out_len;
}
inline u32 rule_op_mangle_title_sep (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 rule_op_mangle_title_sep (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
buf0[0] |= (generate_cmask (buf0[0]));
buf0[1] |= (generate_cmask (buf0[1]));
@ -2497,7 +2497,7 @@ inline u32 rule_op_mangle_title_sep (const u32 p0, const u32 p1, u32 buf0[4], u3
return in_len;
}
inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len)
{
u32 out_len = in_len;
@ -2549,7 +2549,7 @@ inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4],
return out_len;
}
inline u32 apply_rules (__global const u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
u32 apply_rules (__global const u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len)
{
u32 out_len = len;
@ -2567,7 +2567,7 @@ inline u32 apply_rules (__global const u32 *cmds, u32 buf0[4], u32 buf1[4], cons
return out_len;
}
inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, __global const kernel_rule_t *rules_buf, const u32 il_pos, u32x buf0[4], u32x buf1[4])
u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, __global const kernel_rule_t *rules_buf, const u32 il_pos, u32x buf0[4], u32x buf1[4])
{
#if VECT_SIZE == 1

@ -1054,7 +1054,7 @@
// attack-mode 0
inline u32x ix_create_bft (__global const bf_t *bfs_buf, const u32 il_pos)
u32x ix_create_bft (__global const bf_t *bfs_buf, const u32 il_pos)
{
#if VECT_SIZE == 1
const u32x ix = (u32x) (bfs_buf[il_pos + 0].i);
@ -1073,7 +1073,7 @@ inline u32x ix_create_bft (__global const bf_t *bfs_buf, const u32 il_pos)
// attack-mode 1
inline u32x pwlenx_create_combt (__global const pw_t *combs_buf, const u32 il_pos)
u32x pwlenx_create_combt (__global const pw_t *combs_buf, const u32 il_pos)
{
#if VECT_SIZE == 1
const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len);
@ -1090,7 +1090,7 @@ inline u32x pwlenx_create_combt (__global const pw_t *combs_buf, const u32 il_po
return pw_lenx;
}
inline u32x ix_create_combt (__global const pw_t *combs_buf, const u32 il_pos, const int idx)
u32x ix_create_combt (__global const pw_t *combs_buf, const u32 il_pos, const int idx)
{
#if VECT_SIZE == 1
const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]);

@ -33,14 +33,14 @@ typedef VTYPE(uint, VECT_SIZE) u32x;
typedef VTYPE(ulong, VECT_SIZE) u64x;
#endif
inline u32 l32_from_64_S (u64 a)
u32 l32_from_64_S (u64 a)
{
const u32 r = (u32) (a);
return r;
}
inline u32 h32_from_64_S (u64 a)
u32 h32_from_64_S (u64 a)
{
a >>= 32;
@ -49,12 +49,12 @@ inline u32 h32_from_64_S (u64 a)
return r;
}
inline u64 hl32_to_64_S (const u32 a, const u32 b)
u64 hl32_to_64_S (const u32 a, const u32 b)
{
return as_ulong ((uint2) (b, a));
}
inline u32x l32_from_64 (u64x a)
u32x l32_from_64 (u64x a)
{
u32x r;
@ -93,7 +93,7 @@ inline u32x l32_from_64 (u64x a)
return r;
}
inline u32x h32_from_64 (u64x a)
u32x h32_from_64 (u64x a)
{
a >>= 32;
@ -134,7 +134,7 @@ inline u32x h32_from_64 (u64x a)
return r;
}
inline u64x hl32_to_64 (const u32x a, const u32x b)
u64x hl32_to_64 (const u32x a, const u32x b)
{
u64x r;
@ -174,45 +174,37 @@ inline u64x hl32_to_64 (const u32x a, const u32x b)
}
#ifdef IS_AMD
inline u32 swap32_S (const u32 v)
u32 swap32_S (const u32 v)
{
return (as_uint (as_uchar4 (v).s3210));
}
inline u64 swap64_S (const u64 v)
u64 swap64_S (const u64 v)
{
return (as_ulong (as_uchar8 (v).s76543210));
}
inline u32 rotr32_S (const u32 a, const u32 n)
u32 rotr32_S (const u32 a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32 rotl32_S (const u32 a, const u32 n)
u32 rotl32_S (const u32 a, const u32 n)
{
return rotate (a, n);
}
inline u64 rotr64_S (const u64 a, const u32 n)
u64 rotr64_S (const u64 a, const u32 n)
{
const u32 a0 = h32_from_64_S (a);
const u32 a1 = l32_from_64_S (a);
const u32 t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
const u32 t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
const u64 r = hl32_to_64_S (t0, t1);
return r;
return rotate (a, (u64) (64 - n));
}
inline u64 rotl64_S (const u64 a, const u32 n)
u64 rotl64_S (const u64 a, const u32 n)
{
return rotr64_S (a, 64 - n);
return rotate (a, (u64) n);
}
inline u32x swap32 (const u32x v)
u32x swap32 (const u32x v)
{
return ((v >> 24) & 0x000000ff)
| ((v >> 8) & 0x0000ff00)
@ -220,7 +212,7 @@ inline u32x swap32 (const u32x v)
| ((v << 24) & 0xff000000);
}
inline u64x swap64 (const u64x v)
u64x swap64 (const u64x v)
{
return ((v >> 56) & 0x00000000000000ff)
| ((v >> 40) & 0x000000000000ff00)
@ -232,82 +224,74 @@ inline u64x swap64 (const u64x v)
| ((v << 56) & 0xff00000000000000);
}
inline u32x rotr32 (const u32x a, const u32 n)
u32x rotr32 (const u32x a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32x rotl32 (const u32x a, const u32 n)
u32x rotl32 (const u32x a, const u32 n)
{
return rotate (a, n);
}
inline u64x rotr64 (const u64x a, const u32 n)
u64x rotr64 (const u64x a, const u32 n)
{
const u32x a0 = h32_from_64 (a);
const u32x a1 = l32_from_64 (a);
const u32x t0 = (n >= 32) ? amd_bitalign (a0, a1, n - 32) : amd_bitalign (a1, a0, n);
const u32x t1 = (n >= 32) ? amd_bitalign (a1, a0, n - 32) : amd_bitalign (a0, a1, n);
const u64x r = hl32_to_64 (t0, t1);
return r;
return rotate (a, (u64x) (64 - n));
}
inline u64x rotl64 (const u64x a, const u32 n)
u64x rotl64 (const u64x a, const u32 n)
{
return rotr64 (a, 64 - n);
return rotate (a, (u64x) n);
}
inline u32x __bfe (const u32x a, const u32x b, const u32x c)
u32x __bfe (const u32x a, const u32x b, const u32x c)
{
return amd_bfe (a, b, c);
}
inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
u32 __bfe_S (const u32 a, const u32 b, const u32 c)
{
return amd_bfe (a, b, c);
}
inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
{
return amd_bytealign (a, b, c);
}
#endif
#ifdef IS_NV
inline u32 swap32_S (const u32 v)
u32 swap32_S (const u32 v)
{
return (as_uint (as_uchar4 (v).s3210));
}
inline u64 swap64_S (const u64 v)
u64 swap64_S (const u64 v)
{
return (as_ulong (as_uchar8 (v).s76543210));
}
inline u32 rotr32_S (const u32 a, const u32 n)
u32 rotr32_S (const u32 a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32 rotl32_S (const u32 a, const u32 n)
u32 rotl32_S (const u32 a, const u32 n)
{
return rotate (a, n);
}
inline u64 rotr64_S (const u64 a, const u32 n)
u64 rotr64_S (const u64 a, const u32 n)
{
return rotate (a, (u64) 64 - n);
}
inline u64 rotl64_S (const u64 a, const u32 n)
u64 rotl64_S (const u64 a, const u32 n)
{
return rotr64_S (a, 64 - n);
}
inline u32x swap32 (const u32x v)
u32x swap32 (const u32x v)
{
return ((v >> 24) & 0x000000ff)
| ((v >> 8) & 0x0000ff00)
@ -315,7 +299,7 @@ inline u32x swap32 (const u32x v)
| ((v << 24) & 0xff000000);
}
inline u64x swap64 (const u64x v)
u64x swap64 (const u64x v)
{
return ((v >> 56) & 0x00000000000000ff)
| ((v >> 40) & 0x000000000000ff00)
@ -327,27 +311,27 @@ inline u64x swap64 (const u64x v)
| ((v << 56) & 0xff00000000000000);
}
inline u32x rotr32 (const u32x a, const u32 n)
u32x rotr32 (const u32x a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32x rotl32 (const u32x a, const u32 n)
u32x rotl32 (const u32x a, const u32 n)
{
return rotate (a, n);
}
inline u64x rotr64 (const u64x a, const u32 n)
u64x rotr64 (const u64x a, const u32 n)
{
return rotate (a, (u64) 64 - n);
}
inline u64x rotl64 (const u64x a, const u32 n)
u64x rotl64 (const u64x a, const u32 n)
{
return rotate (a, (u64) n);
}
inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
u32x __byte_perm (const u32x a, const u32x b, const u32x c)
{
u32x r;
@ -386,7 +370,7 @@ inline u32x __byte_perm (const u32x a, const u32x b, const u32x c)
return r;
}
inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
{
u32 r;
@ -395,7 +379,7 @@ inline u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
return r;
}
inline u32x __bfe (const u32x a, const u32x b, const u32x c)
u32x __bfe (const u32x a, const u32x b, const u32x c)
{
u32x r;
@ -434,7 +418,7 @@ inline u32x __bfe (const u32x a, const u32x b, const u32x c)
return r;
}
inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
u32 __bfe_S (const u32 a, const u32 b, const u32 c)
{
u32 r;
@ -443,7 +427,7 @@ inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
return r;
}
inline u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
{
u32x r;
@ -490,7 +474,7 @@ inline u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
return r;
}
inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
{
u32 r;
@ -509,37 +493,37 @@ inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
#endif
#ifdef IS_GENERIC
inline u32 swap32_S (const u32 v)
u32 swap32_S (const u32 v)
{
return (as_uint (as_uchar4 (v).s3210));
}
inline u64 swap64_S (const u64 v)
u64 swap64_S (const u64 v)
{
return (as_ulong (as_uchar8 (v).s76543210));
}
inline u32 rotr32_S (const u32 a, const u32 n)
u32 rotr32_S (const u32 a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32 rotl32_S (const u32 a, const u32 n)
u32 rotl32_S (const u32 a, const u32 n)
{
return rotate (a, n);
}
inline u64 rotr64_S (const u64 a, const u32 n)
u64 rotr64_S (const u64 a, const u32 n)
{
return rotate (a, (u64) 64 - n);
}
inline u64 rotl64_S (const u64 a, const u32 n)
u64 rotl64_S (const u64 a, const u32 n)
{
return rotate (a, (u64) n);
}
inline u32x swap32 (const u32x v)
u32x swap32 (const u32x v)
{
return ((v >> 24) & 0x000000ff)
| ((v >> 8) & 0x0000ff00)
@ -547,7 +531,7 @@ inline u32x swap32 (const u32x v)
| ((v << 24) & 0xff000000);
}
inline u64x swap64 (const u64x v)
u64x swap64 (const u64x v)
{
return ((v >> 56) & 0x00000000000000ff)
| ((v >> 40) & 0x000000000000ff00)
@ -559,27 +543,27 @@ inline u64x swap64 (const u64x v)
| ((v << 56) & 0xff00000000000000);
}
inline u32x rotr32 (const u32x a, const u32 n)
u32x rotr32 (const u32x a, const u32 n)
{
return rotate (a, 32 - n);
}
inline u32x rotl32 (const u32x a, const u32 n)
u32x rotl32 (const u32x a, const u32 n)
{
return rotate (a, n);
}
inline u64x rotr64 (const u64x a, const u32 n)
u64x rotr64 (const u64x a, const u32 n)
{
return rotate (a, (u64) 64 - n);
}
inline u64x rotl64 (const u64x a, const u32 n)
u64x rotl64 (const u64x a, const u32 n)
{
return rotate (a, (u64) n);
}
inline u32x __bfe (const u32x a, const u32x b, const u32x c)
u32x __bfe (const u32x a, const u32x b, const u32x c)
{
#define BIT(x) ((u32x) (1u) << (x))
#define BIT_MASK(x) (BIT (x) - 1)
@ -592,7 +576,7 @@ inline u32x __bfe (const u32x a, const u32x b, const u32x c)
#undef BFE
}
inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
u32 __bfe_S (const u32 a, const u32 b, const u32 c)
{
#define BIT(x) (1u << (x))
#define BIT_MASK(x) (BIT (x) - 1)
@ -605,7 +589,7 @@ inline u32 __bfe_S (const u32 a, const u32 b, const u32 c)
#undef BFE
}
inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
{
#if VECT_SIZE == 1
const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
@ -638,7 +622,7 @@ inline u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
#endif
}
inline u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
{
const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);

@ -153,9 +153,6 @@
#if KERN_TYPE == 13800
#undef _unroll
#endif
#if KERN_TYPE == 14100
#undef _unroll
#endif
// nvidia specific
@ -177,6 +174,9 @@
#if KERN_TYPE == 14000
#undef _unroll
#endif
#if KERN_TYPE == 14100
#undef _unroll
#endif
#endif
#endif

File diff suppressed because it is too large Load Diff

@ -17,12 +17,12 @@
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
inline u8 hex_convert (const u8 c)
u8 hex_convert (const u8 c)
{
return (c & 15) + (c >> 6) * 9;
}
inline u8 hex_to_u8 (const u8 hex[2])
u8 hex_to_u8 (const u8 hex[2])
{
u8 v = 0;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -9,7 +9,7 @@
#include "inc_types.cl"
inline void generate_pw (u32 pw_buf[64], __global const cs_t *root_css_buf, __global const cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val)
void generate_pw (u32 pw_buf[64], __global const cs_t *root_css_buf, __global const cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val)
{
__global const cs_t *cs = &root_css_buf[pw_r_len];

@ -9,7 +9,7 @@
#include "inc_types.cl"
inline void generate_pw (u32 pw_buf[64], __global const cs_t *root_css_buf, __global const cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val)
void generate_pw (u32 pw_buf[64], __global const cs_t *root_css_buf, __global const cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val)
{
__global const cs_t *cs = &root_css_buf[pw_r_len];

Loading…
Cancel
Save