From 8901e657a52102e732aaf05be59af68918c9a1c5 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 31 May 2021 08:28:13 +0200 Subject: [PATCH] RC4 Kernels: Improved performance by 20%+ for hash-modes Kerberos 5 (etype 23), MS Office (<= 2003) and PDF (<= 1.6) by using new RC4 code --- OpenCL/inc_cipher_rc4.cl | 92 ++++++++-------- OpenCL/inc_cipher_rc4.h | 6 +- OpenCL/m09800_a0-optimized.cl | 156 +++------------------------- OpenCL/m09800_a1-optimized.cl | 156 +++------------------------- OpenCL/m09800_a3-optimized.cl | 188 +++++---------------------------- OpenCL/m09810_a0-optimized.cl | 144 ++------------------------ OpenCL/m09810_a1-optimized.cl | 144 ++------------------------ OpenCL/m09810_a3-optimized.cl | 176 ++++--------------------------- OpenCL/m09820_a0-optimized.cl | 144 ++------------------------ OpenCL/m09820_a1-optimized.cl | 144 ++------------------------ OpenCL/m09820_a3-optimized.cl | 176 ++++--------------------------- OpenCL/m10500-pure.cl | 166 +++-------------------------- OpenCL/m13100_a0-optimized.cl | 174 +++++-------------------------- OpenCL/m13100_a0-pure.cl | 174 +++++-------------------------- OpenCL/m13100_a1-optimized.cl | 174 +++++-------------------------- OpenCL/m13100_a1-pure.cl | 174 +++++-------------------------- OpenCL/m13100_a3-optimized.cl | 190 ++++++---------------------------- OpenCL/m13100_a3-pure.cl | 174 +++++-------------------------- OpenCL/m18200_a0-optimized.cl | 172 ++++-------------------------- OpenCL/m18200_a0-pure.cl | 172 ++++-------------------------- OpenCL/m18200_a1-optimized.cl | 172 ++++-------------------------- OpenCL/m18200_a1-pure.cl | 172 ++++-------------------------- OpenCL/m18200_a3-optimized.cl | 188 ++++++--------------------------- OpenCL/m18200_a3-pure.cl | 172 ++++-------------------------- OpenCL/m25400-pure.cl | 162 +++-------------------------- docs/changes.txt | 4 +- src/modules/module_09800.c | 45 +++++--- src/modules/module_09810.c | 45 +++++--- src/modules/module_09820.c | 33 +++++- src/modules/module_10500.c | 48 ++++----- src/modules/module_13100.c | 61 +++++------ src/modules/module_18200.c | 45 ++++---- src/modules/module_25400.c | 55 +++++----- 33 files changed, 709 insertions(+), 3589 deletions(-) diff --git a/OpenCL/inc_cipher_rc4.cl b/OpenCL/inc_cipher_rc4.cl index 9437ceca5..4a25a5a9d 100644 --- a/OpenCL/inc_cipher_rc4.cl +++ b/OpenCL/inc_cipher_rc4.cl @@ -8,21 +8,21 @@ // Pattern linear -DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k) +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k) { LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; return S8[k]; } -DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v) +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v) { LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; S8[k] = v; } -DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v) +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v) { S[k] = v; } @@ -74,7 +74,7 @@ DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v) #define KEY8(t,k) (((k) & 3) + (((k) / 4) * 128) + (((t) & 31) * 4) + (((t) / 32) * 8192)) -DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k) +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k) { const u64 lid = get_local_id (0); @@ -83,7 +83,7 @@ DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k) return S8[KEY8 (lid, k)]; } -DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v) +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v) { const u64 lid = get_local_id (0); @@ -94,7 +94,7 @@ DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v) #define KEY32(t,k) (((k) * 32) + ((t) & 31) + (((t) / 32) * 2048)) -DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v) +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v) { const u64 lid = get_local_id (0); @@ -114,18 +114,18 @@ DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key) #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 64; i++) + for (u8 i = 0; i < 64; i++) { SET_KEY32 (S, i, v); v += a; } - const u32 d0 = key[0] >> 0; - const u32 d1 = key[0] >> 8; - const u32 d2 = key[0] >> 16; - const u32 d3 = key[0] >> 24; - const u32 d4 = key[1] >> 0; + const u8 d0 = v8a_from_v32_S (key[0]); + const u8 d1 = v8b_from_v32_S (key[0]); + const u8 d2 = v8c_from_v32_S (key[0]); + const u8 d3 = v8d_from_v32_S (key[0]); + const u8 d4 = v8a_from_v32_S (key[1]); - u32 j = 0; + u8 j = 0; #ifdef _unroll #pragma unroll @@ -150,16 +150,16 @@ DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key) #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 64; i++) + for (u8 i = 0; i < 64; i++) { SET_KEY32 (S, i, v); v += a; } - u32 j = 0; + u8 j = 0; for (u32 i = 0; i < 16; i++) { - u32 idx = i * 16; + u8 idx = i * 16; u32 v; @@ -204,66 +204,66 @@ DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j) DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out) { - u8 src = i; - u8 dst = j; + u8 a = i; + u8 b = j; #ifdef _unroll #pragma unroll #endif - for (u32 k = 0; k < 4; k++) + for (int k = 0; k < 4; k++) { u32 xor4 = 0; - u8 idx; + u32 tmp; - u32 r; + u8 idx; - src += 1; - dst += GET_KEY8 (S, src); + a += 1; + b += GET_KEY8 (S, a); - rc4_swap (S, src, dst); + rc4_swap (S, a, b); - idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); - r = GET_KEY8 (S, idx); + tmp = GET_KEY8 (S, idx); - xor4 |= r << 0; + xor4 |= tmp << 0; - src += 1; - dst += GET_KEY8 (S, src); + a += 1; + b += GET_KEY8 (S, a); - rc4_swap (S, src, dst); + rc4_swap (S, a, b); - idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); - r = GET_KEY8 (S, idx); + tmp = GET_KEY8 (S, idx); - xor4 |= r << 8; + xor4 |= tmp << 8; - src += 1; - dst += GET_KEY8 (S, src); + a += 1; + b += GET_KEY8 (S, a); - rc4_swap (S, src, dst); + rc4_swap (S, a, b); - idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); - r = GET_KEY8 (S, idx); + tmp = GET_KEY8 (S, idx); - xor4 |= r << 16; + xor4 |= tmp << 16; - src += 1; - dst += GET_KEY8 (S, src); + a += 1; + b += GET_KEY8 (S, a); - rc4_swap (S, src, dst); + rc4_swap (S, a, b); - idx = GET_KEY8 (S, src) + GET_KEY8 (S, dst); + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); - r = GET_KEY8 (S, idx); + tmp = GET_KEY8 (S, idx); - xor4 |= r << 24; + xor4 |= tmp << 24; out[k] = in[k] ^ xor4; } - return dst; + return b; } diff --git a/OpenCL/inc_cipher_rc4.h b/OpenCL/inc_cipher_rc4.h index 2feb8176a..5c60a78b7 100644 --- a/OpenCL/inc_cipher_rc4.h +++ b/OpenCL/inc_cipher_rc4.h @@ -6,9 +6,9 @@ #ifndef _INC_CIPHER_RC4_H #define _INC_CIPHER_RC4_H -DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const int k); -DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const int k, const u8 v); -DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const int k, const u32 v); +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k); +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v); +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v); DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key); DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key); diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl index 4b57144ae..e9987ee54 100644 --- a/OpenCL/m09800_a0-optimized.cl +++ b/OpenCL/m09800_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -30,129 +31,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -187,9 +65,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -305,11 +181,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -341,7 +217,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -385,7 +261,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -394,7 +270,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -411,7 +287,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -475,9 +351,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -605,11 +479,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -641,7 +515,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -686,7 +560,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -695,7 +569,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -712,7 +586,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl index f37773596..59d236b07 100644 --- a/OpenCL/m09800_a1-optimized.cl +++ b/OpenCL/m09800_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -353,11 +229,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -389,7 +265,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -433,7 +309,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -442,7 +318,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -459,7 +335,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -523,9 +399,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -703,11 +577,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -739,7 +613,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -784,7 +658,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -793,7 +667,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -810,7 +684,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl index caebeeb4a..9ce63b258 100644 --- a/OpenCL/m09800_a3-optimized.cl +++ b/OpenCL/m09800_a3-optimized.cl @@ -10,6 +10,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -25,130 +26,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09800m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -157,12 +35,6 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -276,11 +148,11 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = hc_swap32_S (out[0]); w0_t[1] = hc_swap32_S (out[1]); @@ -312,7 +184,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -356,7 +228,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -365,7 +237,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -382,7 +254,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -404,7 +276,7 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 } } -DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09800s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -413,12 +285,6 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -544,11 +410,11 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = hc_swap32_S (out[0]); w0_t[1] = hc_swap32_S (out[1]); @@ -580,7 +446,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -625,7 +491,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -634,7 +500,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -651,7 +517,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -715,9 +581,9 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -764,9 +630,9 @@ KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -813,9 +679,9 @@ KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -862,9 +728,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -911,9 +777,9 @@ KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -960,7 +826,7 @@ KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl index 3bea210e6..b7d8eacec 100644 --- a/OpenCL/m09810_a0-optimized.cl +++ b/OpenCL/m09810_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -224,11 +100,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -262,7 +138,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -310,9 +186,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -361,11 +235,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -399,7 +273,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl index 5766aa8f3..7bdaf201e 100644 --- a/OpenCL/m09810_a1-optimized.cl +++ b/OpenCL/m09810_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -26,129 +27,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -183,9 +61,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -268,11 +144,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -306,7 +182,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -354,9 +230,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -451,11 +325,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -489,7 +363,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl index 9287d419e..775d03158 100644 --- a/OpenCL/m09810_a3-optimized.cl +++ b/OpenCL/m09810_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -26,130 +27,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09810m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -158,12 +36,6 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ @@ -194,11 +66,11 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); u32 w0_t[4]; u32 w1_t[4]; @@ -237,13 +109,13 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09810s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -252,12 +124,6 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ @@ -300,11 +166,11 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); u32 w0_t[4]; u32 w1_t[4]; @@ -343,7 +209,7 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -393,9 +259,9 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -442,9 +308,9 @@ KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -491,9 +357,9 @@ KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -540,9 +406,9 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -589,9 +455,9 @@ KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -638,7 +504,7 @@ KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09820_a0-optimized.cl b/OpenCL/m09820_a0-optimized.cl index 3101d2ee6..7c89d2c8c 100644 --- a/OpenCL/m09820_a0-optimized.cl +++ b/OpenCL/m09820_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -30,129 +31,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -187,9 +65,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -327,7 +203,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -338,7 +214,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -355,7 +231,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -419,9 +295,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -570,7 +444,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -581,7 +455,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -598,7 +472,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { diff --git a/OpenCL/m09820_a1-optimized.cl b/OpenCL/m09820_a1-optimized.cl index 578d971bc..187bf54a2 100644 --- a/OpenCL/m09820_a1-optimized.cl +++ b/OpenCL/m09820_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -375,7 +251,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -386,7 +262,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -403,7 +279,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -467,9 +343,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -668,7 +542,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -679,7 +553,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -696,7 +570,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { diff --git a/OpenCL/m09820_a3-optimized.cl b/OpenCL/m09820_a3-optimized.cl index 0b0845d18..1dccc017e 100644 --- a/OpenCL/m09820_a3-optimized.cl +++ b/OpenCL/m09820_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,130 +29,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09820m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -160,12 +38,6 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -299,7 +171,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -310,7 +182,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -327,7 +199,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -349,7 +221,7 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 } } -DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09820s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -358,12 +230,6 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ @@ -508,7 +374,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; @@ -519,7 +385,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -536,7 +402,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -600,9 +466,9 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -649,9 +515,9 @@ KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -698,9 +564,9 @@ KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -747,9 +613,9 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -796,9 +662,9 @@ KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -845,7 +711,7 @@ KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl index 329488d9c..4bf9a13e3 100644 --- a/OpenCL/m10500-pure.cl +++ b/OpenCL/m10500-pure.cl @@ -9,23 +9,12 @@ #include "inc_platform.cl" #include "inc_common.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif #define COMPARE_S "inc_comp_single.cl" #define COMPARE_M "inc_comp_multi.cl" -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -54,132 +43,6 @@ typedef struct pdf14_tmp } pdf14_tmp_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) { /** @@ -207,13 +70,6 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) const u32 pw_len = pws[gid].pw_len; - /** - * shared - */ - - //LOCAL_AS RC4_KEY rc4_keys[64]; - //LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * U_buf */ @@ -283,6 +139,18 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) // max length supported by pdf11 is 32 + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + w0_t[0] = padding[0]; w0_t[1] = padding[1]; w0_t[2] = padding[2]; @@ -377,9 +245,7 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -448,9 +314,9 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) tmp[2] = digest[2] ^ xv; tmp[3] = digest[3] ^ xv; - rc4_init_16 (rc4_key, tmp); + rc4_init_128 (S, tmp); - rc4_next_16 (rc4_key, 0, 0, out, out); + rc4_next_16 (S, 0, 0, out, out); } } diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index 78ec57ad9..b8bf46c0e 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -16,6 +16,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -27,129 +28,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -256,15 +134,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -302,10 +180,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -329,7 +207,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -342,8 +220,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -356,9 +234,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -371,10 +249,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -610,9 +488,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -655,7 +531,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -708,9 +584,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -753,7 +627,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index 85fb0f79c..83b519aef 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -15,6 +15,7 @@ #include "inc_rp.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -26,132 +27,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -170,15 +48,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -215,10 +93,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -242,31 +120,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -404,9 +282,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -439,7 +315,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -466,9 +342,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -501,7 +375,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index d9339b1df..64ec8c79a 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -25,129 +26,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -607,9 +485,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -702,7 +578,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -754,9 +630,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -849,7 +723,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index 1156f04a3..7f9fe5d4e 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -24,132 +25,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -400,9 +278,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -435,7 +311,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -460,9 +336,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -495,7 +369,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index 61617aa61..45c1afeda 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -25,129 +26,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -573,7 +451,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t)) +DECLSPEC void m13100 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t)) { /** * modifier @@ -622,7 +500,7 @@ DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -677,11 +555,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -729,11 +605,9 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m16 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -785,11 +659,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -837,11 +709,9 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s16 (KERN_ATTR_ESALT (krb5tgs_t)) diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 0ef44c00d..7e697aeaa 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -24,132 +25,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16 (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -409,9 +287,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -448,7 +324,7 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -482,9 +358,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -521,7 +395,7 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl index 3374f8c23..3727bd102 100644 --- a/OpenCL/m18200_a0-optimized.cl +++ b/OpenCL/m18200_a0-optimized.cl @@ -16,6 +16,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -27,129 +28,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -255,14 +133,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -608,9 +486,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -653,7 +529,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -706,9 +582,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -751,7 +625,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl index 71f19cbb3..92b4019c7 100644 --- a/OpenCL/m18200_a0-pure.cl +++ b/OpenCL/m18200_a0-pure.cl @@ -15,6 +15,7 @@ #include "inc_rp.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -26,132 +27,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -169,14 +47,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -402,9 +280,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -437,7 +313,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -464,9 +340,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -499,7 +373,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl index 2d77d1e53..122e232bd 100644 --- a/OpenCL/m18200_a1-optimized.cl +++ b/OpenCL/m18200_a1-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -25,129 +26,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -605,9 +483,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -700,7 +576,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -752,9 +628,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -847,7 +721,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl index e5df3c2d1..3dbe70247 100644 --- a/OpenCL/m18200_a1-pure.cl +++ b/OpenCL/m18200_a1-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -24,132 +25,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -398,9 +276,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -433,7 +309,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -458,9 +334,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -493,7 +367,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl index 1a32c19de..659aee575 100644 --- a/OpenCL/m18200_a3-optimized.cl +++ b/OpenCL/m18200_a3-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -25,129 +26,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -571,7 +449,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t)) +DECLSPEC void m18200 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t)) { /** * modifier @@ -620,7 +498,7 @@ DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -675,11 +553,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -727,11 +603,9 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m16 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -783,11 +657,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -835,11 +707,9 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s16 (KERN_ATTR_ESALT (krb5asrep_t)) diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl index 68ddd2c18..a6dbb720a 100644 --- a/OpenCL/m18200_a3-pure.cl +++ b/OpenCL/m18200_a3-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -24,132 +25,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16 (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16 (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -407,9 +285,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -446,7 +322,7 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { @@ -480,9 +356,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; @@ -519,7 +393,7 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { diff --git a/OpenCL/m25400-pure.cl b/OpenCL/m25400-pure.cl index 0a0aba524..d6c15f17a 100644 --- a/OpenCL/m25400-pure.cl +++ b/OpenCL/m25400-pure.cl @@ -12,23 +12,12 @@ #include "inc_platform.cl" #include "inc_common.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif #define COMPARE_S "inc_comp_single.cl" #define COMPARE_M "inc_comp_multi.cl" -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -57,132 +46,6 @@ typedef struct pdf14_tmp } pdf14_tmp_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) { /** @@ -210,13 +73,22 @@ KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) const u32 pw_len = pws[gid].pw_len; + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - //LOCAL_AS RC4_KEY rc4_keys[64]; - //LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[12]; @@ -327,9 +199,7 @@ KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -398,9 +268,9 @@ KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) tmp[2] = digest[2] ^ xv; tmp[3] = digest[3] ^ xv; - rc4_init_16 (rc4_key, tmp); + rc4_init_128 (S, tmp); - rc4_next_16 (rc4_key, 0, 0, out, out); + rc4_next_16 (S, 0, 0, out, out); } } diff --git a/docs/changes.txt b/docs/changes.txt index f07ecf45f..c73cfd07b 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -30,7 +30,7 @@ - AES Crypt Plugin: Reduced max password length from 256 to 128 which improved performance by 22% - RAR3-p (Compressed): Fix workaround in unrar library in AES constant table generation to enable multi-threading support - CRC32: Prevent decompression of data used in CRC32 calculation on host. This leads to false negatives with TrueCrypt/VeraCrypt keyfiles -- RC4: Updated hash-mode 7500, 9710, 9720, 10400 and 10410 to new RC4 crypto library code, improving performance by 20% or more +- RC4 Kernels: Improved performance by 20%+ for hash-modes Kerberos 5 (etype 23), MS Office (<= 2003) and PDF (<= 1.6) by using new RC4 code ## ## Technical @@ -40,7 +40,7 @@ - Dependencies: Updated xxHash from 0.1.0 to v0.8.0 - Stable XXH3 - Documentation: Update missing documentation in plugin developer guide for OPTS_TYPE_MP_MULTI_DISABLE and OPTS_TYPE_NATIVE_THREADS - Kernels: Add standalone true UTF8 to UTF16 converter kernel that runs after amplifier. Use OPTS_TYPE_POST_AMP_UTF16LE from plugin -- Kernels: Add RC4 cipher to crypto library with shared memory access pattern which is not causing any bank conflicts +- Kernels: Add RC4 cipher to crypto library with optimized shared memory access pattern which will not cause any bank conflicts if -u <= 32 - Modules: Recategorized HASH_CATEGORY option in various modules * changes v6.2.0 -> v6.2.1 diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c index 18244b801..4508fcd5b 100644 --- a/src/modules/module_09800.c +++ b/src/modules/module_09800.c @@ -59,25 +59,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3"; static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4"; -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u64 esalt_size = (const u64) sizeof (oldoffice34_t); + char *jit_build_options = NULL; - return esalt_size; -} + u32 native_threads = 0; -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } - return kernel_threads_min; + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; } -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 64; // RC4 + const u64 esalt_size = (const u64) sizeof (oldoffice34_t); - return kernel_threads_max; + return esalt_size; } u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -314,14 +331,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_09810.c b/src/modules/module_09810.c index d92ded08c..e1a434cf3 100644 --- a/src/modules/module_09810.c +++ b/src/modules/module_09810.c @@ -58,25 +58,42 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3"; static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4"; -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u64 esalt_size = (const u64) sizeof (oldoffice34_t); + char *jit_build_options = NULL; - return esalt_size; -} + u32 native_threads = 0; -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } - return kernel_threads_min; + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; } -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { - const u32 kernel_threads_max = 64; // RC4 + const u64 esalt_size = (const u64) sizeof (oldoffice34_t); - return kernel_threads_max; + return esalt_size; } u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -332,14 +349,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_09820.c b/src/modules/module_09820.c index d520b2889..f3f7ddee2 100644 --- a/src/modules/module_09820.c +++ b/src/modules/module_09820.c @@ -60,6 +60,37 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE3 = "$oldoffice$3"; //static const char *SIGNATURE_OLDOFFICE4 = "$oldoffice$4"; +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (oldoffice34_t); @@ -340,7 +371,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_10500.c b/src/modules/module_10500.c index 922759151..fbb1af6a3 100644 --- a/src/modules/module_10500.c +++ b/src/modules/module_10500.c @@ -92,24 +92,30 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } + u32 native_threads = 0; - // Intel CPU - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)) + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - hc_asprintf (&jit_build_options, "-D _unroll"); + native_threads = 1; } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - hc_asprintf (&jit_build_options, "-D _unroll"); + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } } + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + return jit_build_options; } @@ -127,20 +133,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c return tmp_size; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 64; // RC4 - - return kernel_threads_max; -} - u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 pw_max = 32; // https://www.pdflib.com/knowledge-base/pdf-password-security/encryption/ @@ -505,8 +497,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c index 3815b614e..bab0dbf26 100644 --- a/src/modules/module_13100.c +++ b/src/modules/module_13100.c @@ -52,48 +52,51 @@ typedef struct krb5tgs static const char *SIGNATURE_KRB5TGS = "$krb5tgs$23$"; -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 esalt_size = (const u64) sizeof (krb5tgs_t); - - return esalt_size; -} - -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 64; - - return kernel_threads_max; -} - char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { char *jit_build_options = NULL; - // in pure -a 0 mode we reserve pws_t with 64 threads = 256 + 4 bytes = 16640. - // the RC4_KEY with 64 threads requires (256 + 4) 16640. + u32 native_threads = 0; - if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0) + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { if (device_param->device_local_mem_size < 49152) { - hc_asprintf (&jit_build_options, "-D FORCE_DISABLE_SHM"); + native_threads = 32; + } + else + { + native_threads = 64; } } + else + { + native_threads = 32; + } } + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + return jit_build_options; } +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (krb5tgs_t); + + return esalt_size; +} + bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { // amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy) @@ -308,14 +311,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c index 1dbb470c4..b95ddab6f 100644 --- a/src/modules/module_18200.c +++ b/src/modules/module_18200.c @@ -56,20 +56,37 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; - // in pure -a 0 mode we reserve pws_t with 64 threads = 256 + 4 bytes = 16640. - // the RC4_KEY with 64 threads requires (256 + 4) 16640. + u32 native_threads = 0; - if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0) + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { if (device_param->device_local_mem_size < 49152) { - hc_asprintf (&jit_build_options, "-D FORCE_DISABLE_SHM"); + native_threads = 32; } + else + { + native_threads = 64; + } + } + else + { + native_threads = 32; } } + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + return jit_build_options; } @@ -80,20 +97,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED return esalt_size; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 64; - - return kernel_threads_max; -} - bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { // amdgpu-pro-20.50-1234664-ubuntu-20.04 (legacy) @@ -276,8 +279,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_25400.c b/src/modules/module_25400.c index f5af76dde..5dbbe8dc3 100644 --- a/src/modules/module_25400.c +++ b/src/modules/module_25400.c @@ -95,24 +95,37 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } + u32 native_threads = 0; - // Intel CPU - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)) + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - hc_asprintf (&jit_build_options, "-D _unroll"); + native_threads = 1; } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - hc_asprintf (&jit_build_options, "-D _unroll"); + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + if (device_param->device_local_mem_size < 49152) + { + native_threads = 32; + } + else + { + native_threads = 64; + } + } + else + { + native_threads = 32; + } } + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + return jit_build_options; } @@ -130,20 +143,6 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c return tmp_size; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 64; // RC4 - - return kernel_threads_max; -} - u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 pw_max = 32; // https://www.pdflib.com/knowledge-base/pdf-password-security/encryption/ @@ -508,8 +507,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type;