Prepare for a more dynamic #pragma unroll use

pull/332/head
jsteube 8 years ago
parent 34c3557d50
commit c79bed3b7d

@ -403,13 +403,17 @@
void serpent256_set_key (u32 *ks, const u32 *ukey)
{
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
ks[i] = ukey[i];
}
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 132; i++)
{
ks[i + 8] = rotl32 (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11);

@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -112,7 +112,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -110,7 +110,9 @@ void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -371,7 +371,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))
@ -430,6 +432,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
for (u32 i = 0; i < 25; i++)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 j = 0; j < 16; j += 2)
{
u32x t;

@ -368,7 +368,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))
@ -427,6 +429,9 @@ void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local
for (u32 i = 0; i < 25; i++)
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 j = 0; j < 16; j += 2)
{
u32x t;

@ -1569,17 +1569,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons
for (u32 ii = 0; ii < 25; ii++)
{
#ifdef IS_NV
#if CUDA_ARCH >= 500
#else
#ifdef _unroll
#pragma unroll
#endif
#endif
#ifdef IS_AMD
#pragma unroll
#endif
for (u32 i = 0; i < 2; i++)
{
if (i) KEYSET10 else KEYSET00
@ -1705,17 +1697,9 @@ void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, cons
for (u32 ii = 0; ii < 25; ii++)
{
#ifdef IS_NV
#if CUDA_ARCH >= 500
#else
#pragma unroll
#endif
#endif
#ifdef IS_AMD
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 2; i++)
{
if (i) KEYSET10 else KEYSET00
@ -2222,7 +2206,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
u32 tmpResult = 0;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
const u32 b0 = -((search[0] >> i) & 1);
@ -2249,7 +2235,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
u32 out0[32];
u32 out1[32];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + 31 - i];
@ -2259,7 +2247,9 @@ void m01500m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
transpose32c (out0);
transpose32c (out1);
#ifdef _unroll
#pragma unroll
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[31 - slice];
@ -2676,7 +2666,9 @@ __kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
const u32 w0s = (w0 << 1) & 0xfefefefe;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
{
atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));

@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], con
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -116,7 +116,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], con
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -134,7 +134,9 @@ void sha512_transform (const u64 w[16], u64 digest[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -500,7 +502,9 @@ __kernel void m01800_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
{
const u32 block_len = wpc_len[pc];
#pragma unroll 64
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0, p = block_len - 64; k < 64; k++, p++)
{
PUTCHAR64_BE (block, p, GETCHAR64_BE (l_alt_result, k));

@ -361,7 +361,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -414,7 +416,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -1546,21 +1546,9 @@ void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32
KXX_DECL u32 k36, k37, k38, k39, k40, k41;
KXX_DECL u32 k42, k43, k44, k45, k46, k47;
#ifdef IS_NV
#if CUDA_ARCH >= 500
#else
#ifdef _unroll
#pragma unroll
#endif
#endif
#ifdef IS_AMD
#pragma unroll
#endif
#ifdef IS_GENERIC
#pragma unroll 1
#endif
for (u32 i = 0; i < 2; i++)
{
if (i) KEYSET10 else KEYSET00
@ -2060,7 +2048,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
u32 tmpResult = 0;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
const u32 b0 = -((search[0] >> i) & 1);
@ -2087,7 +2077,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
u32 out0[32];
u32 out1[32];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
out0[i] = out[ 0 + 31 - i];
@ -2097,7 +2089,9 @@ void m03000m (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global co
transpose32c (out0);
transpose32c (out1);
#ifdef _unroll
#pragma unroll
#endif
for (int slice = 0; slice < 32; slice++)
{
const u32 r0 = out0[31 - slice];

@ -384,7 +384,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
r = rotl32 (r, 3u);
l = rotl32 (l, 3u);
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -442,7 +444,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
r = rotl32 (r, 3u);
l = rotl32 (l, 3u);
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -381,7 +381,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
r = rotl32 (r, 3u);
l = rotl32 (l, 3u);
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -439,7 +441,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -679,7 +679,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo
L0 = 0;
R0 = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 9; i++)
{
BF_ENCRYPT (L0, R0);
@ -742,7 +744,9 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo
L0 = 0;
R0 = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 9; i++)
{
BF_ENCRYPT (L0, R0);

@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -358,7 +358,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -411,7 +413,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -355,7 +355,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -408,7 +410,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -356,7 +356,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -409,7 +411,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -1180,7 +1180,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@ -1233,7 +1235,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1281,7 +1285,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;

@ -1177,7 +1177,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@ -1230,7 +1232,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1278,7 +1282,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;

@ -1178,7 +1178,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;
@ -1231,7 +1233,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
u32x Lh[8];
u32x Ll[8];
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1279,7 +1283,9 @@ void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 (*s_Ch)[2
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 8; i++)
{
const u32x Lp0 = stateh[(i + 8) & 7] >> 24;

@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -115,7 +115,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -1175,7 +1175,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u
u32 i;
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u32 Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1223,7 +1225,9 @@ void whirlpool_transform_last (u32 dgst[16], __local u32 (*s_Ch)[256], __local u
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u32 Lp0 = stateh[(i + 8) & 7] >> 24;
@ -1371,7 +1375,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
u32 i;
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u32 Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1419,7 +1425,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u32 Lp0 = stateh[(i + 8) & 7] >> 24;

@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
u32 i;
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u8 Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u8 Lp0 = stateh[(i + 8) & 7] >> 24;

@ -1172,7 +1172,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
u32 i;
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u8 Lp0 = Kh[(i + 8) & 7] >> 24;
@ -1220,7 +1222,9 @@ void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 (*s_Ch)[256
Kh[7] = Lh[7];
Kl[7] = Ll[7];
#pragma unroll 8
#ifdef _unroll
#pragma unroll
#endif
for (i = 0; i < 8; i++)
{
const u8 Lp0 = stateh[(i + 8) & 7] >> 24;

@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -114,7 +114,9 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -716,7 +716,9 @@ void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __local u32 *
rek[2] = userkey[2];
rek[3] = userkey[3];
#pragma unroll 10
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0, j = 0; i < 10; i += 1, j += 4)
{
u32 temp = rek[j + 3];
@ -1391,7 +1393,9 @@ __kernel void m06600_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf
AES128_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4);
#pragma unroll KEYLEN
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);

@ -1075,7 +1075,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -1541,7 +1543,9 @@ __kernel void m06800_comp (__global pw_t *pws, __global kernel_rule_t *rules_buf
u32 rdk[KEYLEN];
#pragma unroll 60
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i];
AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4);

@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -112,7 +112,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -1303,7 +1305,9 @@ void sha256_transform (const u32 w[16], u32 digest[8])
ROUND_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -1686,7 +1690,9 @@ __kernel void m07400_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
if (j1)
{
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0, p = block_len - 32; k < 32; k++, p++)
{
PUTCHAR32_BE (block, p, GETCHAR32_BE (alt_result, k));

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -350,7 +350,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = w0[0];
@ -594,7 +596,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = w0[0];

@ -408,7 +408,9 @@ __kernel void m07800_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = w0[0];
@ -712,7 +714,9 @@ __kernel void m07800_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = w0[0];

@ -324,7 +324,9 @@ void m07800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = swap32 (w0[0]);
@ -536,7 +538,9 @@ void m07800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl
digest[3] = SHA1M_D;
digest[4] = SHA1M_E;
#pragma unroll 32
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++) final[i] = 0;
final[0] = swap32 (w0[0]);

@ -114,7 +114,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -119,7 +119,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -168,7 +170,9 @@ void sha256_transform_z (u32x digest[8])
ROUND_STEP_Z (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_Z (i);
@ -217,7 +221,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
ROUND_STEP_S (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_S (i);
@ -270,7 +276,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -281,7 +289,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@ -437,7 +447,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -448,7 +460,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);

@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8])
ROUND_STEP_Z (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_Z (i);
@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
ROUND_STEP_S (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_S (i);
@ -268,7 +274,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -279,7 +287,9 @@ __kernel void m08000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@ -489,7 +499,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -500,7 +512,9 @@ __kernel void m08000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);

@ -117,7 +117,9 @@ void sha256_transform (u32x digest[8], const u32x w[16])
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -166,7 +168,9 @@ void sha256_transform_z (u32x digest[8])
ROUND_STEP_Z (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_Z (i);
@ -215,7 +219,9 @@ void sha256_transform_s (u32x digest[8], __local u32 *w)
ROUND_STEP_S (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_STEP_S (i);
@ -265,7 +271,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -276,7 +284,9 @@ void m08000m (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);
@ -383,7 +393,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
{
w_s1[15] = 0 | salt_buf0 >> 16;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s1[i] = SHA256_EXPAND_S (w_s1[i - 2], w_s1[i - 7], w_s1[i - 15], w_s1[i - 16]);
@ -394,7 +406,9 @@ void m08000s (__local u32 *w_s1, __local u32 *w_s2, u32 w[16], const u32 pw_len,
w_s2[ 2] = salt_buf2 << 16 | 0;
w_s2[15] = (510 + 8) * 8;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i++)
{
w_s2[i] = SHA256_EXPAND_S (w_s2[i - 2], w_s2[i - 7], w_s2[i - 15], w_s2[i - 16]);

@ -134,7 +134,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -321,7 +323,9 @@ void sha512_transform (const u64 w[16], u64 dgst[8])
ROUND512_STEP (0);
//#pragma unroll
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)
{
ROUND512_EXPAND (); ROUND512_STEP (i);

@ -410,7 +410,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -463,7 +465,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -407,7 +407,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -460,7 +462,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -408,7 +408,9 @@ void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __l
u32x r = data[0];
u32x l = data[1];
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32x u;
@ -461,7 +463,9 @@ void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32
c = c & 0x0fffffff;
#pragma unroll 16
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))

@ -82,7 +82,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -104,7 +106,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -79,7 +79,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -101,7 +103,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -94,7 +94,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -116,7 +118,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -91,7 +91,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -113,7 +115,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -93,7 +93,9 @@ void lotus_mix (u32x *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32x tmp_in = in[j];
@ -115,7 +117,9 @@ void lotus_transform_password (u32x in[4], u32x out[4], __local u32 *s_lotus_mag
u32x c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX1 (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -1212,7 +1212,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);

@ -110,7 +110,9 @@ void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const
ROUND_STEP (0);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 64; i += 16)
{
ROUND_EXPAND (); ROUND_STEP (i);
@ -705,7 +707,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V)
const u32 x = gid % xSIZE;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
@ -743,7 +747,9 @@ void scrypt_smix (uint4 *X, uint4 *T, const u32 phy, __global uint4 *V)
salsa_r (X);
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
@ -910,12 +916,16 @@ __kernel void m08900_loop (__global pw_t *pws, __global kernel_rule_t *rules_buf
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = swap32_4 (tmps[gid].P[z]);
scrypt_smix (X, T, scrypt_phy, d_scryptV_buf);
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = swap32_4 (X[z]);
#if SCRYPT_P >= 1

@ -75,7 +75,9 @@ void lotus_mix (u32 *in, __local u32 *s_lotus_magic_table)
{
u32 s = 48;
#pragma unroll 12
#ifdef _unroll
#pragma unroll
#endif
for (int j = 0; j < 12; j++)
{
u32 tmp_in = in[j];
@ -97,7 +99,9 @@ void lotus_transform_password (u32 in[4], u32 out[4], __local u32 *s_lotus_magic
u32 c;
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 4; i++)
{
t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff);

@ -1073,10 +1073,7 @@ void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const
ROUND_STEP (0);
#ifdef IS_AMD
// #pragma unroll
// breaks compiler
#else
#ifdef _unroll
#pragma unroll
#endif
for (int i = 16; i < 80; i += 16)

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -44,7 +44,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -90,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -45,7 +45,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -91,7 +93,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -47,7 +47,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
*ptr++ = v; v += a;
@ -93,7 +95,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4])
{
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -61,7 +61,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -75,7 +77,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -90,7 +94,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

@ -59,7 +59,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
__local u32 *ptr = (__local u32 *) rc4_key->S;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 64; i++)
{
ptr[i] = v; v += a;
@ -73,7 +75,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u32 j = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 255; i += 5)
{
j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j);
@ -88,7 +92,9 @@ void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 data[4])
u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 *in, u32 out[4])
{
#pragma unroll 4
#ifdef _unroll
#pragma unroll
#endif
for (u32 k = 0; k < 4; k++)
{
u32 xor4 = 0;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save