From 3192f55707dea406382bbdd59d35b2c585185bf8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 30 May 2021 12:36:17 +0200 Subject: [PATCH] PDF: Update -m 10410 to new RC4 crypto library code, improving performance by 22% - 32%. --- OpenCL/m10410_a0-optimized.cl | 135 +++------------------------ OpenCL/m10410_a1-optimized.cl | 137 +++------------------------ OpenCL/m10410_a3-optimized.cl | 169 +++++----------------------------- docs/changes.txt | 1 + src/modules/module_10410.c | 58 +++++------- 5 files changed, 74 insertions(+), 426 deletions(-) diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl index 05a449314..9deb9faaf 100644 --- a/OpenCL/m10410_a0-optimized.cl +++ b/OpenCL/m10410_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif CONSTANT_VK u32a padding[8] = @@ -50,115 +51,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -192,8 +85,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -212,25 +104,25 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -264,8 +156,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * digest @@ -296,20 +187,20 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl index 083365c96..16836b2c7 100644 --- a/OpenCL/m10410_a1-optimized.cl +++ b/OpenCL/m10410_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif CONSTANT_VK u32a padding[8] = @@ -48,115 +49,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -190,9 +83,7 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -280,25 +171,25 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -332,9 +223,7 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -434,20 +323,20 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_ESALT (pdf_t)) { } diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl index 5c77f6db6..fa1cb6272 100644 --- a/OpenCL/m10410_a3-optimized.cl +++ b/OpenCL/m10410_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif CONSTANT_VK u32a padding[8] = @@ -48,115 +49,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +DECLSPEC void m10410m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -165,12 +58,6 @@ DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * loop */ @@ -189,17 +76,17 @@ DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +DECLSPEC void m10410s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -208,12 +95,6 @@ DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * digest */ @@ -244,17 +125,17 @@ DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_16 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -298,12 +179,12 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -347,12 +228,12 @@ KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -396,12 +277,12 @@ KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -445,12 +326,12 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -494,12 +375,12 @@ KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -543,7 +424,7 @@ KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/docs/changes.txt b/docs/changes.txt index 88dd1eced..a33d5699f 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -30,6 +30,7 @@ - AES Crypt Plugin: Reduced max password length from 256 to 128 which improved performance by 22% - RAR3-p (Compressed): Fix workaround in unrar library in AES constant table generation to enable multi-threading support - CRC32: Prevent decompression of data used in CRC32 calculation on host. This leads to false negatives with TrueCrypt/VeraCrypt keyfiles +- PDF: Update -m 10410 to new RC4 crypto library code, improving performance by 22% - 32%. ## ## Technical diff --git a/src/modules/module_10410.c b/src/modules/module_10410.c index a96643884..df1d40d2d 100644 --- a/src/modules/module_10410.c +++ b/src/modules/module_10410.c @@ -69,29 +69,29 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - return jit_build_options; + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else + { + native_threads = 32; + } } - // Intel CPU - if ((device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) && (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - // NVIDIA GPU - if (device_param->opencl_device_vendor_id == VENDOR_ID_NV) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); return jit_build_options; } @@ -103,20 +103,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED return esalt_size; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 64; // RC4 - - return kernel_threads_min; -} - -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 64; // RC4 - - return kernel_threads_max; -} - u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u32 pw_min = 5; // RC4-40 fixed @@ -405,8 +391,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type;