diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl new file mode 100644 index 000000000..7fab20f15 --- /dev/null +++ b/OpenCL/m07701_a0-optimized.cl @@ -0,0 +1,488 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//too much register pressure +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" + +#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) +#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) + +#define SETSHIFTEDINT(a,n,v) \ +{ \ + const u32 s = ((n) & 3) * 8; \ + const u64 x = (u64) (v) << s; \ + (a)[((n)/4)+0] &= ~(0xff << ((n & 3) * 8)); \ + (a)[((n)/4)+0] |= x; \ + (a)[((n)/4)+1] = x >> 32; \ +} + +__constant u32a sapb_trans_tbl[256] = +{ + // first value hack for 0 byte as part of an optimization + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, + 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, + 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +__constant u32a bcodeArray[48] = +{ + 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, + 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, + 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d +}; + +DECLSPEC u32 sapb_trans (const u32 in) +{ + u32 out = 0; + + out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; + out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; + out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; + out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; + + return out; +} + +DECLSPEC u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) +{ + t[ 0] = 0; + t[ 1] = 0; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + u32 sum20 = ((a >> 24) & 3) + + ((a >> 16) & 3) + + ((a >> 8) & 3) + + ((a >> 0) & 3) + + ((b >> 8) & 3); + + sum20 |= 0x20; + + const u32 w[2] = { w0[0], w0[1] }; + + const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; + + u32 saved_key[4] = { a, b, c, d }; + + u32 i1 = 0; + u32 i2 = 0; + u32 i3 = 0; + + while (i2 < sum20) + { + if (i1 < pw_len) + { + if (GETCHAR (saved_key, 15 - i1) & 1) + { + PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); + + i2++; + + if (i2 == sum20) break; + } + + PUTCHAR (t, i2, GETCHAR (w, i1)); + + i2++; + + if (i2 == sum20) break; + + i1++; + } + + if (i3 < salt_len) + { + PUTCHAR (t, i2, GETCHAR (s, i3)); + + i2++; + + if (i2 == sum20) break; + + i3++; + } + + PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); + + i2++; + i2++; + } + + return sum20; +} + +__kernel void m07701_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[3] = 0; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + if (out_len > 8) continue; // otherwise it overflows in waldorf function + + /** + * SAP + */ + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, out_len); + + const u32 pw_salt_len = out_len + salt_len; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, pw_salt_len); + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +__kernel void m07701_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07701_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07701_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[3] = 0; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + if (out_len > 8) continue; // otherwise it overflows in waldorf function + + /** + * SAP + */ + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, out_len); + + const u32 pw_salt_len = out_len + salt_len; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, pw_salt_len); + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +__kernel void m07701_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07701_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl new file mode 100644 index 000000000..421d76d49 --- /dev/null +++ b/OpenCL/m07701_a1-optimized.cl @@ -0,0 +1,572 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//too much register pressure +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" + +#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) +#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) + +#define SETSHIFTEDINT(a,n,v) \ +{ \ + const u32 s = ((n) & 3) * 8; \ + const u64 x = (u64) (v) << s; \ + (a)[((n)/4)+0] &= ~(0xff << ((n & 3) * 8)); \ + (a)[((n)/4)+0] |= x; \ + (a)[((n)/4)+1] = x >> 32; \ +} + +__constant u32a sapb_trans_tbl[256] = +{ + // first value hack for 0 byte as part of an optimization + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, + 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, + 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +__constant u32a bcodeArray[48] = +{ + 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, + 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, + 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d +}; + +DECLSPEC u32 sapb_trans (const u32 in) +{ + u32 out = 0; + + out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; + out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; + out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; + out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; + + return out; +} + +DECLSPEC u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) +{ + t[ 0] = 0; + t[ 1] = 0; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + u32 sum20 = ((a >> 24) & 3) + + ((a >> 16) & 3) + + ((a >> 8) & 3) + + ((a >> 0) & 3) + + ((b >> 8) & 3); + + sum20 |= 0x20; + + const u32 w[2] = { w0[0], w0[1] }; + + const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; + + u32 saved_key[4] = { a, b, c, d }; + + u32 i1 = 0; + u32 i2 = 0; + u32 i3 = 0; + + while (i2 < sum20) + { + if (i1 < pw_len) + { + if (GETCHAR (saved_key, 15 - i1) & 1) + { + PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); + + i2++; + + if (i2 == sum20) break; + } + + PUTCHAR (t, i2, GETCHAR (w, i1)); + + i2++; + + if (i2 == sum20) break; + + i1++; + } + + if (i3 < salt_len) + { + PUTCHAR (t, i2, GETCHAR (s, i3)); + + i2++; + + if (i2 == sum20) break; + + i3++; + } + + PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); + + i2++; + i2++; + } + + return sum20; +} + +__kernel void m07700_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[3] = 0; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + + if (pw_len > 8) continue; // otherwise it overflows in waldorf function + + /** + * SAP + */ + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len); + + const u32 pw_salt_len = pw_len + salt_len; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, pw_salt_len); + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +__kernel void m07700_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07700_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07700_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf0[4]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[3] = 0; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + + if (pw_len > 8) continue; // otherwise it overflows in waldorf function + + /** + * SAP + */ + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * append salt + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len); + + const u32 pw_salt_len = pw_len + salt_len; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, pw_salt_len); + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +__kernel void m07700_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07700_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl new file mode 100644 index 000000000..43acffae1 --- /dev/null +++ b/OpenCL/m07701_a3-optimized.cl @@ -0,0 +1,619 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//too much register pressure +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" + +#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) +#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) + +__constant u32a sapb_trans_tbl[256] = +{ + // first value hack for 0 byte as part of an optimization + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, + 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, + 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +__constant u32a bcodeArray[48] = +{ + 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, + 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, + 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d +}; + +DECLSPEC u32 sapb_trans (const u32 in) +{ + u32 out = 0; + + out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; + out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; + out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; + out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; + + return out; +} + +DECLSPEC u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) +{ + t[ 0] = 0; + t[ 1] = 0; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + u32 sum20 = ((a >> 24) & 3) + + ((a >> 16) & 3) + + ((a >> 8) & 3) + + ((a >> 0) & 3) + + ((b >> 8) & 3); + + sum20 |= 0x20; + + const u32 w[2] = { w0[0], w0[1] }; + + const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; + + u32 saved_key[4] = { a, b, c, d }; + + u32 i1 = 0; + u32 i2 = 0; + u32 i3 = 0; + + while (i2 < sum20) + { + if (i1 < pw_len) + { + if (GETCHAR (saved_key, 15 - i1) & 1) + { + PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); + + i2++; + + if (i2 == sum20) break; + } + + PUTCHAR (t, i2, GETCHAR (w, i1)); + + i2++; + + if (i2 == sum20) break; + + i1++; + } + + if (i3 < salt_len) + { + PUTCHAR (t, i2, GETCHAR (s, i3)); + + i2++; + + if (i2 == sum20) break; + + i3++; + } + + PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); + + i2++; + i2++; + } + + return sum20; +} + +DECLSPEC void m07701m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * salt + */ + + u32 salt_buf0[3]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + append_0x80_4x4_S (s0, s1, s2, s3, salt_len); + + switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len); + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = sapb_trans (ix_create_bft (bfs_buf, il_pos)); + + const u32x w0lr = w0l | w0r; + + w0[0] = w0lr; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = s1[1]; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m07701s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + w0[0] = sapb_trans (w0[0]); + w0[1] = sapb_trans (w0[1]); + + /** + * salt + */ + + u32 salt_buf0[3]; + + salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + + salt_buf0[0] = sapb_trans (salt_buf0[0]); + salt_buf0[1] = sapb_trans (salt_buf0[1]); + salt_buf0[2] = sapb_trans (salt_buf0[2]); + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = 0; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + append_0x80_4x4_S (s0, s1, s2, s3, salt_len); + + switch_buffer_by_offset_le (s0, s1, s2, s3, pw_len); + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = sapb_trans (ix_create_bft (bfs_buf, il_pos)); + + const u32x w0lr = w0l | w0r; + + w0[0] = w0lr; + + u32 t[16]; + + t[ 0] = s0[0] | w0[0]; + t[ 1] = s0[1] | w0[1]; + t[ 2] = s0[2]; + t[ 3] = s0[3]; + t[ 4] = s1[0]; + t[ 5] = s1[1]; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = pw_salt_len * 8; + t[15] = 0; + + /** + * md5 + */ + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, digest[0], digest[1], digest[2], digest[3], t); + + append_0x80_4x4_S (t + 0, t + 4, t + 8, t + 12, sum20); + + t[14] = sum20 * 8; + t[15] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (t + 0, t + 4, t + 8, t + 12, digest); + + const u32 r0 = digest[0] ^ digest[2]; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +__kernel void m07701_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07701_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07701_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07701_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07701_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07701_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m07801_a0-optimized.cl b/OpenCL/m07801_a0-optimized.cl new file mode 100644 index 000000000..c076e5912 --- /dev/null +++ b/OpenCL/m07801_a0-optimized.cl @@ -0,0 +1,562 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//incompatible data-dependant code +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" + +__constant u32a theMagicArray[64] = +{ + 0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f, + 0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194, + 0x9f5bc19b, 0x06059d03, 0x9d5e138a, 0x1e9a6ae8, 0xd97c1417, 0x58c72af6, 0xa199630a, 0xd7fd70c3, + 0xf65e7413, 0x03c90b04, 0x2698f726, 0x8a929325, 0xb0a20d23, 0xed63796d, 0x1332fa3c, 0x35029aa3, + 0xb3dd8e0a, 0x24bf51c3, 0x7ccd559f, 0x37af944c, 0x29085282, 0xb23b4e37, 0x9f170791, 0x113bfdcd, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +DECLSPEC u32 GETSHIFTEDINT_CONST (__constant u32 *a, const int n) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (a[d + 0], a[d + 1]); + + tmp <<= m * 8; + + return h32_from_64_S (tmp); +} + +DECLSPEC void SETSHIFTEDINT (u32 *a, const int n, const u32 v) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (v, 0); + + tmp >>= m * 8; + + a[d + 0] |= h32_from_64_S (tmp); + a[d + 1] = l32_from_64_S (tmp); +} + +__kernel void m07801_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; + salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * SAP + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + /** + * sha1 + */ + + u32 final[32]; + + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = swap32_S (w0[0]); + final[ 1] = swap32_S (w0[1]); + final[ 2] = swap32_S (w0[2]); + final[ 3] = swap32_S (w0[3]); + final[ 4] = swap32_S (w1[0]); + final[ 5] = swap32_S (w1[1]); + final[ 6] = swap32_S (w1[2]); + final[ 7] = swap32_S (w1[3]); + final[ 8] = 0; + final[ 9] = 0; + final[10] = 0; + final[11] = 0; + final[12] = 0; + final[13] = 0; + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = swap32_S (salt_buf[i / 4]); // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_M_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +__kernel void m07801_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07801_m16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07801_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; + salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * SAP + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + /** + * sha1 + */ + + u32 final[32]; + + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = swap32_S (w0[0]); + final[ 1] = swap32_S (w0[1]); + final[ 2] = swap32_S (w0[2]); + final[ 3] = swap32_S (w0[3]); + final[ 4] = swap32_S (w1[0]); + final[ 5] = swap32_S (w1[1]); + final[ 6] = swap32_S (w1[2]); + final[ 7] = swap32_S (w1[3]); + final[ 8] = 0; + final[ 9] = 0; + final[10] = 0; + final[11] = 0; + final[12] = 0; + final[13] = 0; + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = swap32_S (salt_buf[i / 4]); // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_S_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +__kernel void m07801_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07801_s16 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m07801_a1-optimized.cl b/OpenCL/m07801_a1-optimized.cl new file mode 100644 index 000000000..3e2da3895 --- /dev/null +++ b/OpenCL/m07801_a1-optimized.cl @@ -0,0 +1,680 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//incompatible data-dependant code +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" + +__constant u32a theMagicArray[64] = +{ + 0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f, + 0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194, + 0x9f5bc19b, 0x06059d03, 0x9d5e138a, 0x1e9a6ae8, 0xd97c1417, 0x58c72af6, 0xa199630a, 0xd7fd70c3, + 0xf65e7413, 0x03c90b04, 0x2698f726, 0x8a929325, 0xb0a20d23, 0xed63796d, 0x1332fa3c, 0x35029aa3, + 0xb3dd8e0a, 0x24bf51c3, 0x7ccd559f, 0x37af944c, 0x29085282, 0xb23b4e37, 0x9f170791, 0x113bfdcd, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +DECLSPEC u32 GETSHIFTEDINT_CONST (__constant u32 *a, const int n) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (a[d + 0], a[d + 1]); + + tmp <<= m * 8; + + return h32_from_64_S (tmp); +} + +DECLSPEC void SETSHIFTEDINT (u32 *a, const int n, const u32 v) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (v, 0); + + tmp >>= m * 8; + + a[d + 0] |= h32_from_64_S (tmp); + a[d + 1] = l32_from_64_S (tmp); +} + +__kernel void m07800_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; + salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * SAP + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + /** + * sha1 + */ + + u32 final[32]; + + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = swap32_S (w0[0]); + final[ 1] = swap32_S (w0[1]); + final[ 2] = swap32_S (w0[2]); + final[ 3] = swap32_S (w0[3]); + final[ 4] = swap32_S (w1[0]); + final[ 5] = swap32_S (w1[1]); + final[ 6] = swap32_S (w1[2]); + final[ 7] = swap32_S (w1[3]); + final[ 8] = swap32_S (w2[0]); + final[ 9] = swap32_S (w2[1]); + final[10] = swap32_S (w2[2]); + final[11] = swap32_S (w2[3]); + final[12] = swap32_S (w3[0]); + final[13] = swap32_S (w3[1]); + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = swap32_S (salt_buf[i / 4]); // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_M_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +__kernel void m07800_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07800_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07800_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len; + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; + salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; + salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); + + const u32x pw_len = pw_l_len + pw_r_len; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * SAP + */ + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + /** + * sha1 + */ + + u32 final[32]; + + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = swap32_S (w0[0]); + final[ 1] = swap32_S (w0[1]); + final[ 2] = swap32_S (w0[2]); + final[ 3] = swap32_S (w0[3]); + final[ 4] = swap32_S (w1[0]); + final[ 5] = swap32_S (w1[1]); + final[ 6] = swap32_S (w1[2]); + final[ 7] = swap32_S (w1[3]); + final[ 8] = swap32_S (w2[0]); + final[ 9] = swap32_S (w2[1]); + final[10] = swap32_S (w2[2]); + final[11] = swap32_S (w2[3]); + final[12] = swap32_S (w3[0]); + final[13] = swap32_S (w3[1]); + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = swap32_S (salt_buf[i / 4]); // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_S_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +__kernel void m07800_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07800_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl new file mode 100644 index 000000000..a4fac21a6 --- /dev/null +++ b/OpenCL/m07801_a3-optimized.cl @@ -0,0 +1,716 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//incompatible data-dependant code +//#define NEW_SIMD_CODE + +#include "inc_vendor.cl" +#include "inc_hash_constants.h" +#include "inc_hash_functions.cl" +#include "inc_types.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" + +__constant u32a theMagicArray[64] = +{ + 0x91ac5114, 0x9f675443, 0x24e73be0, 0x28747bc2, 0x863313eb, 0x5a4fcb5c, 0x080a7337, 0x0e5d1c2f, + 0x338fe6e5, 0xf89baedd, 0x16f24b8d, 0x2ce1d4dc, 0xb0cbdf9d, 0xd4706d17, 0xf94d423f, 0x9b1b1194, + 0x9f5bc19b, 0x06059d03, 0x9d5e138a, 0x1e9a6ae8, 0xd97c1417, 0x58c72af6, 0xa199630a, 0xd7fd70c3, + 0xf65e7413, 0x03c90b04, 0x2698f726, 0x8a929325, 0xb0a20d23, 0xed63796d, 0x1332fa3c, 0x35029aa3, + 0xb3dd8e0a, 0x24bf51c3, 0x7ccd559f, 0x37af944c, 0x29085282, 0xb23b4e37, 0x9f170791, 0x113bfdcd, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +DECLSPEC u32 GETSHIFTEDINT_CONST (__constant u32 *a, const int n) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (a[d + 0], a[d + 1]); + + tmp <<= m * 8; + + return h32_from_64_S (tmp); +} + +DECLSPEC void SETSHIFTEDINT (u32 *a, const int n, const u32 v) +{ + const int d = n / 4; + const int m = n & 3; + + u64 tmp = hl32_to_64_S (v, 0); + + tmp >>= m * 8; + + a[d + 0] |= h32_from_64_S (tmp); + a[d + 1] = l32_from_64_S (tmp); +} + +DECLSPEC void m07801m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = swap32_S (salt_bufs[salt_pos].salt_buf[0]); + salt_buf[1] = swap32_S (salt_bufs[salt_pos].salt_buf[1]); + salt_buf[2] = swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf[3] = swap32_S (salt_bufs[salt_pos].salt_buf[3]); + salt_buf[4] = swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[5] = swap32_S (salt_bufs[salt_pos].salt_buf[5]); + salt_buf[6] = swap32_S (salt_bufs[salt_pos].salt_buf[6]); + salt_buf[7] = swap32_S (salt_bufs[salt_pos].salt_buf[7]); + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_be_S (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + w0[0] = w0lr; + + /** + * SAP + */ + + u32 final[32]; + + final[ 0] = w0[0] | s0[0]; + final[ 1] = w0[1] | s0[1]; + final[ 2] = w0[2] | s0[2]; + final[ 3] = w0[3] | s0[3]; + final[ 4] = w1[0] | s1[0]; + final[ 5] = w1[1] | s1[1]; + final[ 6] = w1[2] | s1[2]; + final[ 7] = w1[3] | s1[3]; + final[ 8] = w2[0] | s2[0]; + final[ 9] = w2[1] | s2[1]; + final[10] = w2[2] | s2[2]; + final[11] = w2[3] | s2[3]; + final[12] = w3[0] | s3[0]; + final[13] = w3[1] | s3[1]; + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = w0[0]; + final[ 1] = w0[1]; + final[ 2] = w0[2]; + final[ 3] = w0[3]; + final[ 4] = w1[0]; + final[ 5] = w1[1]; + final[ 6] = w1[2]; + final[ 7] = w1[3]; + final[ 8] = 0; + final[ 9] = 0; + final[10] = 0; + final[11] = 0; + final[12] = 0; + final[13] = 0; + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_M_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +DECLSPEC void m07801s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf[8]; + + salt_buf[0] = swap32_S (salt_bufs[salt_pos].salt_buf[0]); + salt_buf[1] = swap32_S (salt_bufs[salt_pos].salt_buf[1]); + salt_buf[2] = swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf[3] = swap32_S (salt_bufs[salt_pos].salt_buf[3]); + salt_buf[4] = swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[5] = swap32_S (salt_bufs[salt_pos].salt_buf[5]); + salt_buf[6] = swap32_S (salt_bufs[salt_pos].salt_buf[6]); + salt_buf[7] = swap32_S (salt_bufs[salt_pos].salt_buf[7]); + + const u32 salt_len = salt_bufs[salt_pos].salt_len; + + u32 s0[4]; + u32 s1[4]; + u32 s2[4]; + u32 s3[4]; + + s0[0] = salt_buf[0]; + s0[1] = salt_buf[1]; + s0[2] = salt_buf[2]; + s0[3] = salt_buf[3]; + s1[0] = salt_buf[4]; + s1[1] = salt_buf[5]; + s1[2] = salt_buf[6]; + s1[3] = salt_buf[7]; + s2[0] = 0; + s2[1] = 0; + s2[2] = 0; + s2[3] = 0; + s3[0] = 0; + s3[1] = 0; + s3[2] = 0; + s3[3] = 0; + + switch_buffer_by_offset_be_S (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[digests_offset].digest_buf[DGST_R2], + digests_buf[digests_offset].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + w0[0] = w0lr; + + /** + * SAP + */ + + u32 final[32]; + + final[ 0] = w0[0] | s0[0]; + final[ 1] = w0[1] | s0[1]; + final[ 2] = w0[2] | s0[2]; + final[ 3] = w0[3] | s0[3]; + final[ 4] = w1[0] | s1[0]; + final[ 5] = w1[1] | s1[1]; + final[ 6] = w1[2] | s1[2]; + final[ 7] = w1[3] | s1[3]; + final[ 8] = w2[0] | s2[0]; + final[ 9] = w2[1] | s2[1]; + final[10] = w2[2] | s2[2]; + final[11] = w2[3] | s2[3]; + final[12] = w3[0] | s3[0]; + final[13] = w3[1] | s3[1]; + final[14] = 0; + final[15] = pw_salt_len * 8; + final[16] = 0; + final[17] = 0; + final[18] = 0; + final[19] = 0; + final[20] = 0; + final[21] = 0; + final[22] = 0; + final[23] = 0; + final[24] = 0; + final[25] = 0; + final[26] = 0; + final[27] = 0; + final[28] = 0; + final[29] = 0; + final[30] = 0; + final[31] = 0; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); + + // prepare magic array range + + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; + + lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; + lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; + lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; + offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; + offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; + + // final + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + final[ 0] = w0[0]; + final[ 1] = w0[1]; + final[ 2] = w0[2]; + final[ 3] = w0[3]; + final[ 4] = w1[0]; + final[ 5] = w1[1]; + final[ 6] = w1[2]; + final[ 7] = w1[3]; + final[ 8] = 0; + final[ 9] = 0; + final[10] = 0; + final[11] = 0; + final[12] = 0; + final[13] = 0; + final[14] = 0; + final[15] = 0; + + u32 final_len = pw_len; + + u32 i; + + // append MagicArray + + for (i = 0; i < lengthMagicArray - 4; i += 4) + { + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i); + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + const u32 mask = 0xffffffff << (((4 - (lengthMagicArray - i)) & 3) * 8); + + const u32 tmp = GETSHIFTEDINT_CONST (theMagicArray, offsetMagicArray + i) & mask; + + SETSHIFTEDINT (final, final_len + i, tmp); + + final_len += lengthMagicArray; + + // append Salt + + for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 + { + const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] + + SETSHIFTEDINT (final, final_len + i, tmp); + } + + final_len += salt_len; + + // calculate + + int left; + int off; + + for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) + { + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + } + + final[off + 15] = final_len * 8; + + sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); + + COMPARE_S_SIMD (0, 0, digest[2] & 0xffff0000, digest[1]); + } +} + +__kernel void m07801_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07801_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07801_m16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} + +__kernel void m07801_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07801_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + //const u64 lid = get_local_id (0); + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len; + + /** + * main + */ + + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); +} + +__kernel void m07801_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) +{ +} diff --git a/include/interface.h b/include/interface.h index 29c582d47..a00009b6e 100644 --- a/include/interface.h +++ b/include/interface.h @@ -1587,7 +1587,9 @@ typedef enum kern_type KERN_TYPE_SHA256CRYPT = 7400, KERN_TYPE_KRB5PA = 7500, KERN_TYPE_SAPB = 7700, + KERN_TYPE_SAPB_MANGLED = 7701, KERN_TYPE_SAPG = 7800, + KERN_TYPE_SAPG_MANGLED = 7801, KERN_TYPE_DRUPAL7 = 7900, KERN_TYPE_SYBASEASE = 8000, KERN_TYPE_NETSCALER = 8100, diff --git a/src/interface.c b/src/interface.c index f8ebc578e..b6a0a2722 100644 --- a/src/interface.c +++ b/src/interface.c @@ -173,7 +173,9 @@ static const char *ST_HASH_07300 = "34373437353333363838313532323234333833333032 static const char *ST_HASH_07400 = "$5$7777657035274252$XftMj84MW.New1/ViLY5V4CM4Y7EBvfETaZsCW9vcJ8"; static const char *ST_HASH_07500 = "$krb5pa$23$user$realm$salt$5cbb0c882a2b26956e81644edbdb746326f4f5f0e947144fb3095dffe4b4b03e854fc1d631323632303636373330383333353630"; static const char *ST_HASH_07700 = "027642760180$77EC38630C08DF8D"; +static const char *ST_HASH_07701 = "027642760180$77EC386300000000"; static const char *ST_HASH_07800 = "604020408266$32837BA7B97672BA4E5AC74767A4E6E1AE802651"; +static const char *ST_HASH_07801 = "604020408266$32837BA7B97672BA4E5A00000000000000000000"; static const char *ST_HASH_07900 = "$S$C20340258nzjDWpoQthrdNTR02f0pmev0K/5/Nx80WSkOQcPEQRh"; static const char *ST_HASH_08000 = "0xc0071808773188715731b69bd4e310b4129913aaf657356c5bdf3c46f249ed42477b5c74af6eaac4d15a"; static const char *ST_HASH_08100 = "1130725275da09ca13254957f2314a639818d44c37ef6d558"; @@ -423,7 +425,9 @@ static const char *HT_07300 = "IPMI2 RAKP HMAC-SHA1"; static const char *HT_07400 = "sha256crypt $5$, SHA256 (Unix)"; static const char *HT_07500 = "Kerberos 5 AS-REQ Pre-Auth etype 23"; static const char *HT_07700 = "SAP CODVN B (BCODE)"; +static const char *HT_07701 = "SAP CODVN B (BCODE) mangled from RFC_READ_TABLE"; static const char *HT_07800 = "SAP CODVN F/G (PASSCODE)"; +static const char *HT_07801 = "SAP CODVN F/G (PASSCODE) mangled from RFC_READ_TABLE"; static const char *HT_07900 = "Drupal7"; static const char *HT_08000 = "Sybase ASE"; static const char *HT_08100 = "Citrix NetScaler"; @@ -16795,7 +16799,9 @@ const char *strhashtype (const u32 hash_mode) case 7400: return HT_07400; case 7500: return HT_07500; case 7700: return HT_07700; + case 7701: return HT_07701; case 7800: return HT_07800; + case 7801: return HT_07801; case 7900: return HT_07900; case 8000: return HT_08000; case 8100: return HT_08100; @@ -18374,14 +18380,14 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const size_t out_le (char *) krb5pa->salt, data); } - else if (hash_mode == 7700) + else if ((hash_mode == 7700) || (hash_mode == 7701)) { snprintf (out_buf, out_len - 1, "%s$%08X%08X", (char *) salt.salt_buf, digest_buf[0], digest_buf[1]); } - else if (hash_mode == 7800) + else if ((hash_mode == 7800) || (hash_mode == 7801)) { snprintf (out_buf, out_len - 1, "%s$%08X%08X%08X%08X%08X", (char *) salt.salt_buf, @@ -23572,6 +23578,26 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN; break; + case 7701: hashconfig->hash_type = HASH_TYPE_SAPB; + hashconfig->salt_type = SALT_TYPE_EMBEDDED; + hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL; + hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_PT_UPPER + | OPTS_TYPE_ST_UPPER; + hashconfig->kern_type = KERN_TYPE_SAPB_MANGLED; + hashconfig->dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 + hashconfig->parse_func = sapb_parse_hash; + hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_INIT + | OPTI_TYPE_NOT_ITERATED; + hashconfig->dgst_pos0 = 0; + hashconfig->dgst_pos1 = 1; + hashconfig->dgst_pos2 = 2; + hashconfig->dgst_pos3 = 3; + hashconfig->st_hash = ST_HASH_07701; + hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN; + break; + case 7800: hashconfig->hash_type = HASH_TYPE_SAPG; hashconfig->salt_type = SALT_TYPE_EMBEDDED; hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL; @@ -23592,6 +23618,26 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN; break; + case 7801: hashconfig->hash_type = HASH_TYPE_SAPG; + hashconfig->salt_type = SALT_TYPE_EMBEDDED; + hashconfig->attack_exec = ATTACK_EXEC_INSIDE_KERNEL; + hashconfig->opts_type = OPTS_TYPE_PT_GENERATE_BE + | OPTS_TYPE_ST_ADD80 + | OPTS_TYPE_ST_UPPER; + hashconfig->kern_type = KERN_TYPE_SAPG_MANGLED; + hashconfig->dgst_size = DGST_SIZE_4_5; + hashconfig->parse_func = sapg_parse_hash; + hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_PRECOMPUTE_INIT + | OPTI_TYPE_NOT_ITERATED; + hashconfig->dgst_pos0 = 3; + hashconfig->dgst_pos1 = 4; + hashconfig->dgst_pos2 = 2; + hashconfig->dgst_pos3 = 1; + hashconfig->st_hash = ST_HASH_07801; + hashconfig->st_pass = ST_PASS_HASHCAT_PLAIN; + break; + case 7900: hashconfig->hash_type = HASH_TYPE_SHA512; hashconfig->salt_type = SALT_TYPE_EMBEDDED; hashconfig->attack_exec = ATTACK_EXEC_OUTSIDE_KERNEL; diff --git a/src/usage.c b/src/usage.c index 34f938e0f..b0b61cadc 100644 --- a/src/usage.c +++ b/src/usage.c @@ -285,7 +285,9 @@ static const char *const USAGE_BIG[] = " 9900 | Radmin2 | Operating Systems", " 125 | ArubaOS | Operating Systems", " 7700 | SAP CODVN B (BCODE) | Enterprise Application Software (EAS)", + " 7701 | SAP CODVN B (BCODE) via RFC_READ_TABLE | Enterprise Application Software (EAS)", " 7800 | SAP CODVN F/G (PASSCODE) | Enterprise Application Software (EAS)", + " 7801 | SAP CODVN F/G (PASSCODE) via RFC_READ_TABLE | Enterprise Application Software (EAS)", " 10300 | SAP CODVN H (PWDSALTEDHASH) iSSHA-1 | Enterprise Application Software (EAS)", " 8600 | Lotus Notes/Domino 5 | Enterprise Application Software (EAS)", " 8700 | Lotus Notes/Domino 6 | Enterprise Application Software (EAS)",