From a9b204c91d70f824ee0b8c0301f6f615895e88a4 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 4 Feb 2016 22:09:21 +0100 Subject: [PATCH] Converted to new SIMD: -m 11500 -a 0 --- OpenCL/m11500_a0.cl | 192 +++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 120 deletions(-) diff --git a/OpenCL/m11500_a0.cl b/OpenCL/m11500_a0.cl index 3b59107db..c69dac69d 100644 --- a/OpenCL/m11500_a0.cl +++ b/OpenCL/m11500_a0.cl @@ -5,6 +5,8 @@ #define _CRC32_ +#define NEW_SIMD_CODE + #include "include/constants.h" #include "include/kernel_vendor.h" @@ -18,9 +20,7 @@ #include "OpenCL/common.c" #include "include/rp_kernel.h" #include "OpenCL/rp.c" - -#define COMPARE_S "OpenCL/check_single_comp4.c" -#define COMPARE_M "OpenCL/check_multi_comp4.c" +#include "OpenCL/simd.c" __constant u32 crc32tab[0x100] = { @@ -90,11 +90,11 @@ __constant u32 crc32tab[0x100] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -static u32 round_crc32 (u32 a, const u32 v) +static u32x round_crc32 (u32x a, const u32x v) { - const u32 k = (a ^ v) & 0xff; + const u32x k = (a ^ v) & 0xff; - const u32 s = a >> 8; + const u32x s = a >> 8; #if VECT_SIZE == 1 a = (u32x) crc32tab[k]; @@ -111,9 +111,9 @@ static u32 round_crc32 (u32 a, const u32 v) return a; } -static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv) +static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) { - u32 a = iv ^ ~0; + u32x a = iv ^ ~0; if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); @@ -139,12 +139,6 @@ __kernel void m11500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 lid = get_local_id (0); - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - /** * base */ @@ -169,70 +163,52 @@ __kernel void m11500_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, const u32 pw_len = pws[gid].pw_len; + /** + * digest + */ + + const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + /** * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < rules_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; + const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - u32 w1[4]; + append_0x01_2x4 (w0, w1, out_len); - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; + u32x w[16]; - u32 w2[4]; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; + u32x a = crc32 (w, pw_len, iv); + u32x b = 0; + u32x c = 0; + u32x d = 0; - u32 w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - - u32 w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32 a = crc32 (w_t, out_len, iv); - u32 b = 0; - - const u32 r0 = a; - const u32 r1 = b; - const u32 r2 = 0; - const u32 r3 = 0; - - #include COMPARE_M + COMPARE_M_SIMD (a, b, c, d); } } @@ -294,66 +270,42 @@ __kernel void m11500_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < rules_cnt; il_pos += VECT_SIZE) { - u32 w0[4]; + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; + const u32 out_len = apply_rules_vect (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); - u32 w1[4]; + append_0x01_2x4 (w0, w1, out_len); - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; + u32x w[16]; - u32 w2[4]; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; + u32x a = crc32 (w, pw_len, iv); + u32x b = 0; + u32x c = 0; + u32x d = 0; - u32 w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - - u32 w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32 a = crc32 (w_t, out_len, iv); - u32 b = 0; - - const u32 r0 = a; - const u32 r1 = b; - const u32 r2 = 0; - const u32 r3 = 0; - - #include COMPARE_S + COMPARE_S_SIMD (a, b, c, d); } }