diff --git a/OpenCL/m00600_a0-optimized.cl b/OpenCL/m00600_a0-optimized.cl index 6b7009f7d..7b4fdfa1e 100644 --- a/OpenCL/m00600_a0-optimized.cl +++ b/OpenCL/m00600_a0-optimized.cl @@ -20,44 +20,44 @@ #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ + d = rotr64 (d ^ a, 32); \ c = c + d; \ - b = rotr64(b ^ c, 24); \ + b = rotr64 (b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ + d = rotr64 (d ^ a, 16); \ c = c + d; \ - b = rotr64(b ^ c, 63); \ + b = rotr64 (b ^ c, 63); \ } while(0) -#define BLAKE2B_ROUND(r) \ - do { \ - BLAKE2B_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - BLAKE2B_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - BLAKE2B_G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - BLAKE2B_G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - BLAKE2B_G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - BLAKE2B_G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - BLAKE2B_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ +#define BLAKE2B_ROUND(r) \ + do { \ + BLAKE2B_G (r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + BLAKE2B_G (r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + BLAKE2B_G (r,2,v[ 2],v[ 6],v[10],v[14]); \ + BLAKE2B_G (r,3,v[ 3],v[ 7],v[11],v[15]); \ + BLAKE2B_G (r,4,v[ 0],v[ 5],v[10],v[15]); \ + BLAKE2B_G (r,5,v[ 1],v[ 6],v[11],v[12]); \ + BLAKE2B_G (r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + BLAKE2B_G (r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform (u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; - t[0] += hl32_to_64(0, out_len); + t[0] += hl32_to_64 (0, out_len); - m[0] = hl32_to_64(w0[1], w0[0]); - m[1] = hl32_to_64(w0[3], w0[2]); - m[2] = hl32_to_64(w1[1], w1[0]); - m[3] = hl32_to_64(w1[3], w1[2]); - m[4] = hl32_to_64(w2[1], w2[0]); - m[5] = hl32_to_64(w2[3], w2[2]); - m[6] = hl32_to_64(w3[1], w3[0]); - m[7] = hl32_to_64(w3[3], w3[2]); - m[8] = 0; - m[9] = 0; + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; m[10] = 0; m[11] = 0; m[12] = 0; @@ -82,7 +82,7 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; - const u8a blake2b_sigma[12][16] = + const int blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , @@ -98,31 +98,31 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; - BLAKE2B_ROUND( 0); - BLAKE2B_ROUND( 1); - BLAKE2B_ROUND( 2); - BLAKE2B_ROUND( 3); - BLAKE2B_ROUND( 4); - BLAKE2B_ROUND( 5); - BLAKE2B_ROUND( 6); - BLAKE2B_ROUND( 7); - BLAKE2B_ROUND( 8); - BLAKE2B_ROUND( 9); - BLAKE2B_ROUND(10); - BLAKE2B_ROUND(11); + BLAKE2B_ROUND ( 0); + BLAKE2B_ROUND ( 1); + BLAKE2B_ROUND ( 2); + BLAKE2B_ROUND ( 3); + BLAKE2B_ROUND ( 4); + BLAKE2B_ROUND ( 5); + BLAKE2B_ROUND ( 6); + BLAKE2B_ROUND ( 7); + BLAKE2B_ROUND ( 8); + BLAKE2B_ROUND ( 9); + BLAKE2B_ROUND (10); + BLAKE2B_ROUND (11); - h[0] = h[0] ^ v[0] ^ v[ 8]; - h[1] = h[1] ^ v[1] ^ v[ 9]; - h[2] = h[2] ^ v[2] ^ v[10]; - h[3] = h[3] ^ v[3] ^ v[11]; - h[4] = h[4] ^ v[4] ^ v[12]; - h[5] = h[5] ^ v[5] ^ v[13]; - h[6] = h[6] ^ v[6] ^ v[14]; - h[7] = h[7] ^ v[7] ^ v[15]; + h[0] = h[0] ^ v[0] ^ v[ 8]; + h[1] = h[1] ^ v[1] ^ v[ 9]; + h[2] = h[2] ^ v[2] ^ v[10]; + h[3] = h[3] ^ v[3] ^ v[11]; + h[4] = h[4] ^ v[4] ^ v[12]; + h[5] = h[5] ^ v[5] ^ v[13]; + h[6] = h[6] ^ v[6] ^ v[14]; + h[7] = h[7] ^ v[7] ^ v[15]; } __kernel void m00600_m04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -215,7 +215,7 @@ __kernel void m00600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru const u32x r3 = l32_from_64(digest[1]); COMPARE_M_SIMD(r0, r1, r2, r3); - } + } } __kernel void m00600_m08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) @@ -227,7 +227,7 @@ __kernel void m00600_m16 (__global pw_t *pws, __constant const kernel_rule_t *ru } __kernel void m00600_s04 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -281,7 +281,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru digests_buf[digests_offset].digest_buf[DGST_R2], digests_buf[digests_offset].digest_buf[DGST_R3] }; - + /** * loop */ @@ -292,7 +292,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru u32x w1[4] = { 0 }; u32x w2[4] = { 0 }; u32x w3[4] = { 0 }; - + const u32x out_len = apply_rules_vect(pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); u64x digest[8]; @@ -334,7 +334,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru const u32x r3 = l32_from_64(digest[1]); COMPARE_S_SIMD(r0, r1, r2, r3); - } + } } __kernel void m00600_s08 (__global pw_t *pws, __constant const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) diff --git a/OpenCL/m00600_a1-optimized.cl b/OpenCL/m00600_a1-optimized.cl index 829093288..a40b10613 100644 --- a/OpenCL/m00600_a1-optimized.cl +++ b/OpenCL/m00600_a1-optimized.cl @@ -20,44 +20,44 @@ #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ + d = rotr64 (d ^ a, 32); \ c = c + d; \ - b = rotr64(b ^ c, 24); \ + b = rotr64 (b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ + d = rotr64 (d ^ a, 16); \ c = c + d; \ - b = rotr64(b ^ c, 63); \ + b = rotr64 (b ^ c, 63); \ } while(0) -#define BLAKE2B_ROUND(r) \ - do { \ - BLAKE2B_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - BLAKE2B_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - BLAKE2B_G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - BLAKE2B_G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - BLAKE2B_G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - BLAKE2B_G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - BLAKE2B_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ +#define BLAKE2B_ROUND(r) \ + do { \ + BLAKE2B_G (r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + BLAKE2B_G (r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + BLAKE2B_G (r,2,v[ 2],v[ 6],v[10],v[14]); \ + BLAKE2B_G (r,3,v[ 3],v[ 7],v[11],v[15]); \ + BLAKE2B_G (r,4,v[ 0],v[ 5],v[10],v[15]); \ + BLAKE2B_G (r,5,v[ 1],v[ 6],v[11],v[12]); \ + BLAKE2B_G (r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + BLAKE2B_G (r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform (u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; - t[0] += hl32_to_64(0, out_len); + t[0] += hl32_to_64 (0, out_len); - m[0] = hl32_to_64(w0[1], w0[0]); - m[1] = hl32_to_64(w0[3], w0[2]); - m[2] = hl32_to_64(w1[1], w1[0]); - m[3] = hl32_to_64(w1[3], w1[2]); - m[4] = hl32_to_64(w2[1], w2[0]); - m[5] = hl32_to_64(w2[3], w2[2]); - m[6] = hl32_to_64(w3[1], w3[0]); - m[7] = hl32_to_64(w3[3], w3[2]); - m[8] = 0; - m[9] = 0; + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; m[10] = 0; m[11] = 0; m[12] = 0; @@ -82,7 +82,7 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; - const u8 blake2b_sigma[12][16] = + const int blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , @@ -98,31 +98,31 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; - BLAKE2B_ROUND( 0); - BLAKE2B_ROUND( 1); - BLAKE2B_ROUND( 2); - BLAKE2B_ROUND( 3); - BLAKE2B_ROUND( 4); - BLAKE2B_ROUND( 5); - BLAKE2B_ROUND( 6); - BLAKE2B_ROUND( 7); - BLAKE2B_ROUND( 8); - BLAKE2B_ROUND( 9); - BLAKE2B_ROUND(10); - BLAKE2B_ROUND(11); + BLAKE2B_ROUND ( 0); + BLAKE2B_ROUND ( 1); + BLAKE2B_ROUND ( 2); + BLAKE2B_ROUND ( 3); + BLAKE2B_ROUND ( 4); + BLAKE2B_ROUND ( 5); + BLAKE2B_ROUND ( 6); + BLAKE2B_ROUND ( 7); + BLAKE2B_ROUND ( 8); + BLAKE2B_ROUND ( 9); + BLAKE2B_ROUND (10); + BLAKE2B_ROUND (11); - h[0] = h[0] ^ v[0] ^ v[ 8]; - h[1] = h[1] ^ v[1] ^ v[ 9]; - h[2] = h[2] ^ v[2] ^ v[10]; - h[3] = h[3] ^ v[3] ^ v[11]; - h[4] = h[4] ^ v[4] ^ v[12]; - h[5] = h[5] ^ v[5] ^ v[13]; - h[6] = h[6] ^ v[6] ^ v[14]; - h[7] = h[7] ^ v[7] ^ v[15]; + h[0] = h[0] ^ v[0] ^ v[ 8]; + h[1] = h[1] ^ v[1] ^ v[ 9]; + h[2] = h[2] ^ v[2] ^ v[10]; + h[3] = h[3] ^ v[3] ^ v[11]; + h[4] = h[4] ^ v[4] ^ v[12]; + h[5] = h[5] ^ v[5] ^ v[13]; + h[6] = h[6] ^ v[6] ^ v[14]; + h[7] = h[7] ^ v[7] ^ v[15]; } __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -286,7 +286,7 @@ __kernel void m00600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule } __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -340,7 +340,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule digests_buf[digests_offset].digest_buf[DGST_R2], digests_buf[digests_offset].digest_buf[DGST_R3] }; - + /** * loop */ @@ -453,7 +453,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32x r3 = l32_from_64(digest[1]); COMPARE_S_SIMD(r0, r1, r2, r3); - } + } } __kernel void m00600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) diff --git a/OpenCL/m00600_a3-optimized.cl b/OpenCL/m00600_a3-optimized.cl index ca4da7144..90c6cd9f0 100644 --- a/OpenCL/m00600_a3-optimized.cl +++ b/OpenCL/m00600_a3-optimized.cl @@ -10,7 +10,7 @@ #include "inc_hash_functions.cl" #include "inc_types.cl" #include "inc_common.cl" -#include "inc_simd.cl" +#include "inc_simd.cl", #define BLAKE2B_FINAL 1 #define BLAKE2B_UPDATE 0 @@ -18,44 +18,44 @@ #define BLAKE2B_G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ + d = rotr64 (d ^ a, 32); \ c = c + d; \ - b = rotr64(b ^ c, 24); \ + b = rotr64 (b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ + d = rotr64 (d ^ a, 16); \ c = c + d; \ - b = rotr64(b ^ c, 63); \ + b = rotr64 (b ^ c, 63); \ } while(0) -#define BLAKE2B_ROUND(r) \ - do { \ - BLAKE2B_G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - BLAKE2B_G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - BLAKE2B_G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - BLAKE2B_G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - BLAKE2B_G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - BLAKE2B_G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - BLAKE2B_G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - BLAKE2B_G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ +#define BLAKE2B_ROUND(r) \ + do { \ + BLAKE2B_G (r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + BLAKE2B_G (r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + BLAKE2B_G (r,2,v[ 2],v[ 6],v[10],v[14]); \ + BLAKE2B_G (r,3,v[ 3],v[ 7],v[11],v[15]); \ + BLAKE2B_G (r,4,v[ 0],v[ 5],v[10],v[15]); \ + BLAKE2B_G (r,5,v[ 1],v[ 6],v[11],v[12]); \ + BLAKE2B_G (r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + BLAKE2B_G (r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) -void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) +void blake2b_transform (u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x out_len, const u8 isFinal) { if (isFinal) f[0] = -1; - t[0] += hl32_to_64(0, out_len); + t[0] += hl32_to_64 (0, out_len); - m[0] = hl32_to_64(w0[1], w0[0]); - m[1] = hl32_to_64(w0[3], w0[2]); - m[2] = hl32_to_64(w1[1], w1[0]); - m[3] = hl32_to_64(w1[3], w1[2]); - m[4] = hl32_to_64(w2[1], w2[0]); - m[5] = hl32_to_64(w2[3], w2[2]); - m[6] = hl32_to_64(w3[1], w3[0]); - m[7] = hl32_to_64(w3[3], w3[2]); - m[8] = 0; - m[9] = 0; + m[ 0] = hl32_to_64 (w0[1], w0[0]); + m[ 1] = hl32_to_64 (w0[3], w0[2]); + m[ 2] = hl32_to_64 (w1[1], w1[0]); + m[ 3] = hl32_to_64 (w1[3], w1[2]); + m[ 4] = hl32_to_64 (w2[1], w2[0]); + m[ 5] = hl32_to_64 (w2[3], w2[2]); + m[ 6] = hl32_to_64 (w3[1], w3[0]); + m[ 7] = hl32_to_64 (w3[3], w3[2]); + m[ 8] = 0; + m[ 9] = 0; m[10] = 0; m[11] = 0; m[12] = 0; @@ -80,7 +80,7 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], v[14] = BLAKE2B_IV_06 ^ f[0]; v[15] = BLAKE2B_IV_07 ^ f[1]; - const u8 blake2b_sigma[12][16] = + const int blake2b_sigma[12][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , @@ -96,31 +96,31 @@ void blake2b_transform(u64x h[8], u64x t[2], u64x f[2], u64x m[16], u64x v[16], { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } }; - BLAKE2B_ROUND( 0); - BLAKE2B_ROUND( 1); - BLAKE2B_ROUND( 2); - BLAKE2B_ROUND( 3); - BLAKE2B_ROUND( 4); - BLAKE2B_ROUND( 5); - BLAKE2B_ROUND( 6); - BLAKE2B_ROUND( 7); - BLAKE2B_ROUND( 8); - BLAKE2B_ROUND( 9); - BLAKE2B_ROUND(10); - BLAKE2B_ROUND(11); + BLAKE2B_ROUND ( 0); + BLAKE2B_ROUND ( 1); + BLAKE2B_ROUND ( 2); + BLAKE2B_ROUND ( 3); + BLAKE2B_ROUND ( 4); + BLAKE2B_ROUND ( 5); + BLAKE2B_ROUND ( 6); + BLAKE2B_ROUND ( 7); + BLAKE2B_ROUND ( 8); + BLAKE2B_ROUND ( 9); + BLAKE2B_ROUND (10); + BLAKE2B_ROUND (11); - h[0] = h[0] ^ v[0] ^ v[ 8]; - h[1] = h[1] ^ v[1] ^ v[ 9]; - h[2] = h[2] ^ v[2] ^ v[10]; - h[3] = h[3] ^ v[3] ^ v[11]; - h[4] = h[4] ^ v[4] ^ v[12]; - h[5] = h[5] ^ v[5] ^ v[13]; - h[6] = h[6] ^ v[6] ^ v[14]; - h[7] = h[7] ^ v[7] ^ v[15]; + h[0] = h[0] ^ v[0] ^ v[ 8]; + h[1] = h[1] ^ v[1] ^ v[ 9]; + h[2] = h[2] ^ v[2] ^ v[10]; + h[3] = h[3] ^ v[3] ^ v[11]; + h[4] = h[4] ^ v[4] ^ v[12]; + h[5] = h[5] ^ v[5] ^ v[13]; + h[6] = h[6] ^ v[6] ^ v[14]; + h[7] = h[7] ^ v[7] ^ v[15]; } __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -221,7 +221,7 @@ __kernel void m00600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule COMPARE_M_SIMD(r0, r1, r2, r3); } -} +} __kernel void m00600_m08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) { @@ -232,7 +232,7 @@ __kernel void m00600_m16 (__global pw_t *pws, __global const kernel_rule_t *rule } __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) -{ +{ /** * modifier */ @@ -280,7 +280,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule { const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; const u32x w0x = w0l | w0r; - + u32x w0[4]; u32x w1[4]; u32x w2[4]; @@ -345,7 +345,7 @@ __kernel void m00600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule COMPARE_S_SIMD(r0, r1, r2, r3); } -} +} __kernel void m00600_s08 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const blake2_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) {