diff --git a/OpenCL/m28000_a0-optimized.cl b/OpenCL/m28000_a0-optimized.cl index 044118a69..ceed33ac3 100644 --- a/OpenCL/m28000_a0-optimized.cl +++ b/OpenCL/m28000_a0-optimized.cl @@ -154,42 +154,42 @@ CONSTANT_VK u64a crc64jonestab[0x100] = 0x536fa08fdfd90e51, 0x29b7d047efec8728, }; -DECLSPEC u64 round_crc64jones (u64 a, const u64 v) +DECLSPEC u64 round_crc64jones (u64 a, const u64 v, SHM_TYPE u64 *s_crc64jonestab) { const u64 k = (a ^ v) & 0xff; const u64 s = a >> 8; - a = crc64jonestab[k]; + a = s_crc64jonestab[k]; a ^= s; return a; } -DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv) +DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv, SHM_TYPE u64 *s_crc64jonestab) { u64 a = iv; - if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24); - if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0); - if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8); - if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16); - if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24); - if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0); - if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8); - if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16); - if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24); + if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0, s_crc64jonestab); + if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8, s_crc64jonestab); + if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16, s_crc64jonestab); + if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24, s_crc64jonestab); + if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0, s_crc64jonestab); + if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8, s_crc64jonestab); + if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16, s_crc64jonestab); + if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24, s_crc64jonestab); + if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0, s_crc64jonestab); + if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8, s_crc64jonestab); + if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16, s_crc64jonestab); + if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24, s_crc64jonestab); for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { - if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24); + if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0, s_crc64jonestab); + if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8, s_crc64jonestab); + if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16, s_crc64jonestab); + if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24, s_crc64jonestab); } return a; @@ -201,16 +201,37 @@ KERNEL_FQ void m28000_m04 (KERN_ATTR_RULES_ESALT (crc64_t)) * modifier */ + const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); /** - * base + * CRC64Jones shared */ - const u64 gid = get_global_id (0); + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -267,7 +288,7 @@ KERNEL_FQ void m28000_m04 (KERN_ATTR_RULES_ESALT (crc64_t)) w[14] = 0; w[15] = 0; - u64 a = crc64jones (w, pw_len, iv); + u64 a = crc64jones (w, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); @@ -292,16 +313,37 @@ KERNEL_FQ void m28000_s04 (KERN_ATTR_RULES_ESALT (crc64_t)) * modifier */ + const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); /** - * base + * CRC64Jones shared */ - const u64 gid = get_global_id (0); + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -370,7 +412,7 @@ KERNEL_FQ void m28000_s04 (KERN_ATTR_RULES_ESALT (crc64_t)) w[14] = 0; w[15] = 0; - u64 a = crc64jones (w, pw_len, iv); + u64 a = crc64jones (w, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); diff --git a/OpenCL/m28000_a1-optimized.cl b/OpenCL/m28000_a1-optimized.cl index 92f5979e5..4de6f092f 100644 --- a/OpenCL/m28000_a1-optimized.cl +++ b/OpenCL/m28000_a1-optimized.cl @@ -152,42 +152,42 @@ CONSTANT_VK u64a crc64jonestab[0x100] = 0x536fa08fdfd90e51, 0x29b7d047efec8728, }; -DECLSPEC u64 round_crc64jones (u64 a, const u64 v) +DECLSPEC u64 round_crc64jones (u64 a, const u64 v, SHM_TYPE u64 *s_crc64jonestab) { const u64 k = (a ^ v) & 0xff; const u64 s = a >> 8; - a = crc64jonestab[k]; + a = s_crc64jonestab[k]; a ^= s; return a; } -DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv) +DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv, SHM_TYPE u64 *s_crc64jonestab) { u64 a = iv; - if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24); - if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0); - if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8); - if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16); - if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24); - if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0); - if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8); - if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16); - if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24); + if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0, s_crc64jonestab); + if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8, s_crc64jonestab); + if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16, s_crc64jonestab); + if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24, s_crc64jonestab); + if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0, s_crc64jonestab); + if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8, s_crc64jonestab); + if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16, s_crc64jonestab); + if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24, s_crc64jonestab); + if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0, s_crc64jonestab); + if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8, s_crc64jonestab); + if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16, s_crc64jonestab); + if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24, s_crc64jonestab); for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { - if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24); + if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0, s_crc64jonestab); + if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8, s_crc64jonestab); + if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16, s_crc64jonestab); + if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24, s_crc64jonestab); } return a; @@ -199,16 +199,37 @@ KERNEL_FQ void m28000_m04 (KERN_ATTR_ESALT (crc64_t)) * modifier */ + const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); /** - * base + * CRC64Jones shared */ - const u64 gid = get_global_id (0); + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -325,7 +346,7 @@ KERNEL_FQ void m28000_m04 (KERN_ATTR_ESALT (crc64_t)) w[14] = w3[2]; w[15] = w3[3]; - u64 a = crc64jones (w, pw_len, iv); + u64 a = crc64jones (w, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); @@ -350,16 +371,37 @@ KERNEL_FQ void m28000_s04 (KERN_ATTR_ESALT (crc64_t)) * modifier */ + const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); /** - * base + * CRC64Jones shared */ - const u64 gid = get_global_id (0); + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 pw_buf0[4]; u32 pw_buf1[4]; @@ -488,7 +530,7 @@ KERNEL_FQ void m28000_s04 (KERN_ATTR_ESALT (crc64_t)) w[14] = w3[2]; w[15] = w3[3]; - u64 a = crc64jones (w, pw_len, iv); + u64 a = crc64jones (w, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); diff --git a/OpenCL/m28000_a3-optimized.cl b/OpenCL/m28000_a3-optimized.cl index a2a218f8c..45cfe89ad 100644 --- a/OpenCL/m28000_a3-optimized.cl +++ b/OpenCL/m28000_a3-optimized.cl @@ -152,48 +152,48 @@ CONSTANT_VK u64a crc64jonestab[0x100] = 0x536fa08fdfd90e51, 0x29b7d047efec8728, }; -DECLSPEC u64 round_crc64jones (u64 a, const u64 v) +DECLSPEC u64 round_crc64jones (u64 a, const u64 v, SHM_TYPE u64 *s_crc64jonestab) { const u64 k = (a ^ v) & 0xff; const u64 s = a >> 8; - a = crc64jonestab[k]; + a = s_crc64jonestab[k]; a ^= s; return a; } -DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv) +DECLSPEC u64 crc64jones (const u32 *w, const u32 pw_len, const u64 iv, SHM_TYPE u64 *s_crc64jonestab) { u64 a = iv; - if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24); - if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0); - if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8); - if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16); - if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24); - if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0); - if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8); - if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16); - if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24); + if (pw_len >= 1) a = round_crc64jones (a, w[0] >> 0, s_crc64jonestab); + if (pw_len >= 2) a = round_crc64jones (a, w[0] >> 8, s_crc64jonestab); + if (pw_len >= 3) a = round_crc64jones (a, w[0] >> 16, s_crc64jonestab); + if (pw_len >= 4) a = round_crc64jones (a, w[0] >> 24, s_crc64jonestab); + if (pw_len >= 5) a = round_crc64jones (a, w[1] >> 0, s_crc64jonestab); + if (pw_len >= 6) a = round_crc64jones (a, w[1] >> 8, s_crc64jonestab); + if (pw_len >= 7) a = round_crc64jones (a, w[1] >> 16, s_crc64jonestab); + if (pw_len >= 8) a = round_crc64jones (a, w[1] >> 24, s_crc64jonestab); + if (pw_len >= 9) a = round_crc64jones (a, w[2] >> 0, s_crc64jonestab); + if (pw_len >= 10) a = round_crc64jones (a, w[2] >> 8, s_crc64jonestab); + if (pw_len >= 11) a = round_crc64jones (a, w[2] >> 16, s_crc64jonestab); + if (pw_len >= 12) a = round_crc64jones (a, w[2] >> 24, s_crc64jonestab); for (u32 i = 12, j = 3; i < pw_len; i += 4, j += 1) { - if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24); + if (pw_len >= (i + 1)) a = round_crc64jones (a, w[j] >> 0, s_crc64jonestab); + if (pw_len >= (i + 2)) a = round_crc64jones (a, w[j] >> 8, s_crc64jonestab); + if (pw_len >= (i + 3)) a = round_crc64jones (a, w[j] >> 16, s_crc64jonestab); + if (pw_len >= (i + 4)) a = round_crc64jones (a, w[j] >> 24, s_crc64jonestab); } return a; } -DECLSPEC void m28000m (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) +DECLSPEC void m28000m (SHM_TYPE u64 *s_crc64jonestab, u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) { /** * modifier @@ -243,7 +243,7 @@ DECLSPEC void m28000m (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) w_t[14] = w[14]; w_t[15] = w[15]; - u64 a = crc64jones (w_t, pw_len, iv); + u64 a = crc64jones (w_t, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); @@ -254,7 +254,7 @@ DECLSPEC void m28000m (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) } } -DECLSPEC void m28000s (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) +DECLSPEC void m28000s (SHM_TYPE u64 *s_crc64jonestab, u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) { /** * modifier @@ -316,7 +316,7 @@ DECLSPEC void m28000s (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) w_t[14] = w[14]; w_t[15] = w[15]; - u64 a = crc64jones (w_t, pw_len, iv); + u64 a = crc64jones (w_t, pw_len, iv, s_crc64jonestab); const u32 r0 = l32_from_64 (a); const u32 r1 = h32_from_64 (a); @@ -330,13 +330,40 @@ DECLSPEC void m28000s (u32 *w, const u32 pw_len, KERN_ATTR_ESALT (crc64_t)) KERNEL_FQ void m28000_m04 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -362,19 +389,46 @@ KERNEL_FQ void m28000_m04 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000m (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m28000_m08 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -400,19 +454,46 @@ KERNEL_FQ void m28000_m08 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000m (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m28000_m16 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -438,19 +519,46 @@ KERNEL_FQ void m28000_m16 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000m (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m28000_s04 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -476,19 +584,46 @@ KERNEL_FQ void m28000_s04 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000s (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m28000_s08 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -514,19 +649,46 @@ KERNEL_FQ void m28000_s08 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000s (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m28000_s16 (KERN_ATTR_ESALT (crc64_t)) { /** - * base + * modifier */ const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * CRC64Jones shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u64 s_crc64jonestab[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_crc64jonestab[i] = crc64jonestab[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_crc64jonestab = crc64jonestab; + + #endif if (gid >= gid_max) return; + /** + * Base + */ + u32 w[16]; w[ 0] = pws[gid].i[ 0]; @@ -552,5 +714,5 @@ KERNEL_FQ void m28000_s16 (KERN_ATTR_ESALT (crc64_t)) * main */ - m28000s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); + m28000s (s_crc64jonestab, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); }