From b2193e1af4d1134694001538ec5c69b0e285e7f9 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 23 Jun 2021 08:36:17 +0200 Subject: [PATCH] Add rc4_next_16_global() and fix address space of edata buffer in -m 13100 and -m18200 --- OpenCL/inc_cipher_rc4.cl | 66 +++++++++++++++++++++++++++++++++++ OpenCL/inc_cipher_rc4.h | 9 ++--- OpenCL/m13100_a0-optimized.cl | 32 ++++++++--------- OpenCL/m13100_a0-pure.cl | 32 ++++++++--------- OpenCL/m13100_a1-optimized.cl | 32 ++++++++--------- OpenCL/m13100_a1-pure.cl | 32 ++++++++--------- OpenCL/m13100_a3-optimized.cl | 32 ++++++++--------- OpenCL/m13100_a3-pure.cl | 32 ++++++++--------- OpenCL/m18200_a0-optimized.cl | 30 ++++++++-------- OpenCL/m18200_a0-pure.cl | 30 ++++++++-------- OpenCL/m18200_a1-optimized.cl | 30 ++++++++-------- OpenCL/m18200_a1-pure.cl | 30 ++++++++-------- OpenCL/m18200_a3-optimized.cl | 30 ++++++++-------- OpenCL/m18200_a3-pure.cl | 30 ++++++++-------- 14 files changed, 257 insertions(+), 190 deletions(-) diff --git a/OpenCL/inc_cipher_rc4.cl b/OpenCL/inc_cipher_rc4.cl index 4a25a5a9d..6180f3f19 100644 --- a/OpenCL/inc_cipher_rc4.cl +++ b/OpenCL/inc_cipher_rc4.cl @@ -267,3 +267,69 @@ DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, return b; } + +DECLSPEC u8 rc4_next_16_global (LOCAL_AS u32 *S, const u8 i, const u8 j, GLOBAL_AS const u32 *in, u32 *out) +{ + u8 a = i; + u8 b = j; + + #ifdef _unroll + #pragma unroll + #endif + for (int k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u32 tmp; + + u8 idx; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 0; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 8; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 16; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 24; + + out[k] = in[k] ^ xor4; + } + + return b; +} diff --git a/OpenCL/inc_cipher_rc4.h b/OpenCL/inc_cipher_rc4.h index 5c60a78b7..4574d95d1 100644 --- a/OpenCL/inc_cipher_rc4.h +++ b/OpenCL/inc_cipher_rc4.h @@ -10,9 +10,10 @@ DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k); DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v); DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v); -DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key); -DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key); -DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j); -DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out); +DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j); +DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out); +DECLSPEC u8 rc4_next_16_global (LOCAL_AS u32 *S, const u8 i, const u8 j, GLOBAL_AS const u32 *in, u32 *out); #endif // _INC_CIPHER_RC4_H diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index b8bf46c0e..61fe57484 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -134,11 +134,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -180,10 +180,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -207,7 +207,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -220,8 +220,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -234,9 +234,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -249,10 +249,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index 83b519aef..70462fd99 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -48,11 +48,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -93,10 +93,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -120,31 +120,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index 64ec8c79a..5c103b1f0 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -132,11 +132,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -178,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -205,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -218,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -232,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -247,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index 7f9fe5d4e..7ae0386dd 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -46,11 +46,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -91,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -118,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index 45c1afeda..c93673d7e 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -132,11 +132,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -178,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -205,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -218,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -232,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -247,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 7e697aeaa..e6230b386 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -46,11 +46,11 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; @@ -91,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -118,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl index 3727bd102..9f0e8ef20 100644 --- a/OpenCL/m18200_a0-optimized.cl +++ b/OpenCL/m18200_a0-optimized.cl @@ -133,7 +133,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -178,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -205,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -218,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -232,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -247,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl index 92b4019c7..d8b8abe8b 100644 --- a/OpenCL/m18200_a0-pure.cl +++ b/OpenCL/m18200_a0-pure.cl @@ -47,7 +47,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -91,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -118,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl index 122e232bd..051b6c64f 100644 --- a/OpenCL/m18200_a1-optimized.cl +++ b/OpenCL/m18200_a1-optimized.cl @@ -131,7 +131,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -176,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -203,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -216,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -230,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -245,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl index 3dbe70247..de5cbc279 100644 --- a/OpenCL/m18200_a1-pure.cl +++ b/OpenCL/m18200_a1-pure.cl @@ -45,7 +45,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -89,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -116,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl index 659aee575..e4398a99a 100644 --- a/OpenCL/m18200_a3-optimized.cl +++ b/OpenCL/m18200_a3-optimized.cl @@ -131,7 +131,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -176,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -203,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -216,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -230,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -245,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl index a6dbb720a..5bb9c6e15 100644 --- a/OpenCL/m18200_a3-pure.cl +++ b/OpenCL/m18200_a3-pure.cl @@ -45,7 +45,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (S, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && @@ -89,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -116,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 if (edata2_left < 16) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); }