diff --git a/OpenCL/inc_cipher_rc4.cl b/OpenCL/inc_cipher_rc4.cl index 31f1be757..9437ceca5 100644 --- a/OpenCL/inc_cipher_rc4.cl +++ b/OpenCL/inc_cipher_rc4.cl @@ -106,16 +106,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v) #endif -DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = GET_KEY8 (S, i); - SET_KEY8 (S, i, GET_KEY8 (S, j)); - SET_KEY8 (S, j, tmp); -} - -DECLSPEC void rc4_init_16 (LOCAL_AS u32 *S, const u32 *data) +DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key) { u32 v = 0x03020100; u32 a = 0x04040404; @@ -128,11 +119,11 @@ DECLSPEC void rc4_init_16 (LOCAL_AS u32 *S, const u32 *data) SET_KEY32 (S, i, v); v += a; } - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; + const u32 d0 = key[0] >> 0; + const u32 d1 = key[0] >> 8; + const u32 d2 = key[0] >> 16; + const u32 d3 = key[0] >> 24; + const u32 d4 = key[1] >> 0; u32 j = 0; @@ -151,8 +142,71 @@ DECLSPEC void rc4_init_16 (LOCAL_AS u32 *S, const u32 *data) j += GET_KEY8 (S, 255) + d0; rc4_swap (S, 255, j); } -DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) +DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key) { + u32 v = 0x03020100; + u32 a = 0x04040404; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 64; i++) + { + SET_KEY32 (S, i, v); v += a; + } + + u32 j = 0; + + for (u32 i = 0; i < 16; i++) + { + u32 idx = i * 16; + + u32 v; + + v = key[0]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[1]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[2]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[3]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + } +} + +DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = GET_KEY8 (S, i); + SET_KEY8 (S, i, GET_KEY8 (S, j)); + SET_KEY8 (S, j, tmp); +} + +DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out) +{ + u8 src = i; + u8 dst = j; + #ifdef _unroll #pragma unroll #endif @@ -162,44 +216,54 @@ DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, u8 i, u8 j, CONSTANT_AS u32a *in, u32 u8 idx; - i += 1; - j += GET_KEY8 (S, i); + u32 r; + + src += 1; + dst += GET_KEY8 (S, src); + + rc4_swap (S, src, dst); + + idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); + + r = GET_KEY8 (S, idx); + + xor4 |= r << 0; - rc4_swap (S, i, j); + src += 1; + dst += GET_KEY8 (S, src); - idx = GET_KEY8 (S, i) + GET_KEY8 (S, j); + rc4_swap (S, src, dst); - xor4 |= GET_KEY8 (S, idx) << 0; + idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); - i += 1; - j += GET_KEY8 (S, i); + r = GET_KEY8 (S, idx); - rc4_swap (S, i, j); + xor4 |= r << 8; - idx = GET_KEY8 (S, i) + GET_KEY8 (S, j); + src += 1; + dst += GET_KEY8 (S, src); - xor4 |= GET_KEY8 (S, idx) << 8; + rc4_swap (S, src, dst); - i += 1; - j += GET_KEY8 (S, i); + idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); - rc4_swap (S, i, j); + r = GET_KEY8 (S, idx); - idx = GET_KEY8 (S, i) + GET_KEY8 (S, j); + xor4 |= r << 16; - xor4 |= GET_KEY8 (S, idx) << 16; + src += 1; + dst += GET_KEY8 (S, src); - i += 1; - j += GET_KEY8 (S, i); + rc4_swap (S, src, dst); - rc4_swap (S, i, j); + idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); - idx = GET_KEY8 (S, i) + GET_KEY8 (S, j); + r = GET_KEY8 (S, idx); - xor4 |= GET_KEY8 (S, idx) << 24; + xor4 |= r << 24; out[k] = in[k] ^ xor4; } - return j; + return dst; } diff --git a/OpenCL/inc_cipher_rc4.h b/OpenCL/inc_cipher_rc4.h index 3348c4bb8..2feb8176a 100644 --- a/OpenCL/inc_cipher_rc4.h +++ b/OpenCL/inc_cipher_rc4.h @@ -10,8 +10,9 @@ DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k); DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v); DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v); -DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j); -DECLSPEC void rc4_init_16 (LOCAL_AS u32 *S, const u32 *data); -DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out); +DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j); +DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out); #endif // _INC_CIPHER_RC4_H diff --git a/OpenCL/m09700_a0-optimized.cl b/OpenCL/m09700_a0-optimized.cl index 2f61ea2ef..55d8f3f55 100644 --- a/OpenCL/m09700_a0-optimized.cl +++ b/OpenCL/m09700_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -26,129 +27,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void gen336 (u32 *digest_pre, u32 *salt_buf, u32 *digest) { u32 digest_t0[2]; @@ -527,9 +405,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -636,11 +512,11 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -666,7 +542,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -714,9 +590,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -835,11 +709,11 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -865,7 +739,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09700_a1-optimized.cl b/OpenCL/m09700_a1-optimized.cl index 1f4c9d141..67f93ed7f 100644 --- a/OpenCL/m09700_a1-optimized.cl +++ b/OpenCL/m09700_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,129 +25,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void gen336 (u32 *digest_pre, u32 *salt_buf, u32 *digest) { u32 digest_t0[2]; @@ -525,9 +403,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -692,11 +568,11 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -722,7 +598,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -770,9 +646,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -949,11 +823,11 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -979,7 +853,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl index 84fbc8929..2b360e251 100644 --- a/OpenCL/m09700_a3-optimized.cl +++ b/OpenCL/m09700_a3-optimized.cl @@ -10,6 +10,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -21,130 +22,7 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09700m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -153,12 +31,6 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -514,11 +386,11 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -544,13 +416,13 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09700s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -559,12 +431,6 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -932,11 +798,11 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -962,7 +828,7 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -1012,9 +878,9 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09700m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1061,9 +927,9 @@ KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09700m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m16 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1114,9 +980,9 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09700s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1163,9 +1029,9 @@ KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09700s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s16 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09710_a0-optimized.cl b/OpenCL/m09710_a0-optimized.cl index 3c3b49de5..02c357382 100644 --- a/OpenCL/m09710_a0-optimized.cl +++ b/OpenCL/m09710_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -26,129 +27,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) { /** @@ -183,9 +61,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -243,11 +119,11 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -273,7 +149,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -321,9 +197,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -393,11 +267,11 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -423,7 +297,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09710_a1-optimized.cl b/OpenCL/m09710_a1-optimized.cl index 80625f90c..eca18ee6c 100644 --- a/OpenCL/m09710_a1-optimized.cl +++ b/OpenCL/m09710_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,129 +25,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) { /** @@ -181,9 +59,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -287,11 +163,11 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -317,7 +193,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -365,9 +241,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -483,11 +357,11 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -513,7 +387,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl index 015a579f3..ef5dfe3f8 100644 --- a/OpenCL/m09710_a3-optimized.cl +++ b/OpenCL/m09710_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,130 +25,7 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09710m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -156,12 +34,6 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ @@ -220,11 +92,11 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -250,13 +122,13 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09710s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -265,12 +137,6 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ @@ -341,11 +207,11 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -371,7 +237,7 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -421,9 +287,9 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09710m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -478,9 +344,9 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09710s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_s08 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl index a778d5bfc..368ce44c6 100644 --- a/OpenCL/m10400_a0-optimized.cl +++ b/OpenCL/m10400_a0-optimized.cl @@ -18,18 +18,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -81,6 +69,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_RULES_ const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -207,7 +211,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_RULES_ digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; @@ -255,6 +259,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_RULES_ const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -393,7 +413,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_RULES_ digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl index dbacf9af5..df63ad87f 100644 --- a/OpenCL/m10400_a1-optimized.cl +++ b/OpenCL/m10400_a1-optimized.cl @@ -16,18 +16,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -79,6 +67,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_ESALT const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -265,7 +269,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_ESALT digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; @@ -313,6 +317,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_ESALT const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -511,7 +531,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_ESALT digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl index fac27fa93..150466322 100644 --- a/OpenCL/m10400_a3-optimized.cl +++ b/OpenCL/m10400_a3-optimized.cl @@ -16,18 +16,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -58,6 +46,22 @@ DECLSPEC void m10400m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * U_buf */ @@ -199,7 +203,7 @@ DECLSPEC void m10400m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; @@ -218,6 +222,22 @@ DECLSPEC void m10400s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * U_buf */ @@ -371,7 +391,7 @@ DECLSPEC void m10400s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons digest[2] = 0; digest[3] = 0; - rc4_init_16 (S, digest); + rc4_init_40 (S, digest); u32 out[4]; diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl index 9deb9faaf..026cf0131 100644 --- a/OpenCL/m10410_a0-optimized.cl +++ b/OpenCL/m10410_a0-optimized.cl @@ -18,18 +18,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -81,6 +69,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_RULES_ const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -104,7 +108,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_RULES_ * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; @@ -152,6 +156,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_RULES_ const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -187,7 +207,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_RULES_ * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl index 16836b2c7..831007d0b 100644 --- a/OpenCL/m10410_a1-optimized.cl +++ b/OpenCL/m10410_a1-optimized.cl @@ -16,18 +16,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -79,6 +67,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -171,7 +175,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; @@ -219,6 +223,22 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ @@ -323,7 +343,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl index fa1cb6272..98cc5478f 100644 --- a/OpenCL/m10410_a3-optimized.cl +++ b/OpenCL/m10410_a3-optimized.cl @@ -16,18 +16,6 @@ #include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -58,6 +46,22 @@ DECLSPEC void m10410m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * loop */ @@ -76,7 +80,7 @@ DECLSPEC void m10410m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; @@ -95,6 +99,22 @@ DECLSPEC void m10410s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * digest */ @@ -125,7 +145,7 @@ DECLSPEC void m10410s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, cons * pdf */ - rc4_init_16 (S, w0); + rc4_init_40 (S, w0); u32 out[4]; diff --git a/docs/changes.txt b/docs/changes.txt index 2d288d96f..ccba2a476 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -30,7 +30,7 @@ - AES Crypt Plugin: Reduced max password length from 256 to 128 which improved performance by 22% - RAR3-p (Compressed): Fix workaround in unrar library in AES constant table generation to enable multi-threading support - CRC32: Prevent decompression of data used in CRC32 calculation on host. This leads to false negatives with TrueCrypt/VeraCrypt keyfiles -- PDF: Update -m 10400 and -m 10410 to new RC4 crypto library code, improving performance by 20% or more +- RC4: Update -m 97x0 and -m 104x0 to new RC4 crypto library code, improving performance by 20% or more ## ## Technical diff --git a/src/modules/module_09700.c b/src/modules/module_09700.c index af8b5bcca..f5e2f1138 100644 --- a/src/modules/module_09700.c +++ b/src/modules/module_09700.c @@ -57,25 +57,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE0 = "$oldoffice$0"; static const char *SIGNATURE_OLDOFFICE1 = "$oldoffice$1"; -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u64 esalt_size = (const u64) sizeof (oldoffice01_t); - - return esalt_size; + char *jit_build_options = NULL; + + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 64; // RC4 + const u64 esalt_size = (const u64) sizeof (oldoffice01_t); - return kernel_threads_max; + return esalt_size; } u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -255,14 +272,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_09710.c b/src/modules/module_09710.c index 5c31456a2..250bc3863 100644 --- a/src/modules/module_09710.c +++ b/src/modules/module_09710.c @@ -57,25 +57,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE0 = "$oldoffice$0"; static const char *SIGNATURE_OLDOFFICE1 = "$oldoffice$1"; -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u64 esalt_size = (const u64) sizeof (oldoffice01_t); - - return esalt_size; + char *jit_build_options = NULL; + + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 64; // RC4 + const u64 esalt_size = (const u64) sizeof (oldoffice01_t); - return kernel_threads_max; + return esalt_size; } u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -274,14 +291,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type;