From 7190dcf855355440502559dddf59038081ff6350 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 20 Feb 2016 16:13:06 +0100 Subject: [PATCH] Prepare NEW_SIMD_MODE for -a 1 kernels --- OpenCL/m00000_a1.cl | 152 +++++++++++++++------------------------ OpenCL/m00020_a3.cl | 4 +- OpenCL/m00040_a3.cl | 4 +- OpenCL/m00050_a3.cl | 4 +- OpenCL/m00060_a3.cl | 4 +- OpenCL/m00120_a3.cl | 4 +- OpenCL/m00140_a3.cl | 4 +- OpenCL/m00150_a3.cl | 4 +- OpenCL/m00160_a3.cl | 4 +- OpenCL/m01420_a3.cl | 4 +- OpenCL/m01440_a3.cl | 4 +- OpenCL/m01450_a3.cl | 4 +- OpenCL/m01460_a3.cl | 4 +- OpenCL/m01720_a3.cl | 4 +- OpenCL/m01740_a3.cl | 4 +- OpenCL/m01750_a3.cl | 4 +- OpenCL/m01760_a3.cl | 4 +- OpenCL/m02610_a3.cl | 4 +- OpenCL/m02710_a3.cl | 4 +- OpenCL/m02810_a3.cl | 4 +- OpenCL/m03710_a3.cl | 4 +- OpenCL/m03800_a3.cl | 4 +- OpenCL/m04310_a3.cl | 4 +- OpenCL/m04400_a3.cl | 4 +- OpenCL/m04500_a3.cl | 4 +- OpenCL/m04700_a3.cl | 4 +- OpenCL/m04800_a3.cl | 4 +- OpenCL/m04900_a3.cl | 4 +- OpenCL/m05000_a3.cl | 4 +- OpenCL/m05100_a3.cl | 4 +- OpenCL/m05300_a3.cl | 4 +- OpenCL/m05400_a3.cl | 4 +- OpenCL/m05600_a3.cl | 4 +- OpenCL/m06000_a3.cl | 4 +- OpenCL/m06100_a3.cl | 4 +- OpenCL/m06900_a3.cl | 4 +- OpenCL/m07300_a3.cl | 4 +- OpenCL/m07600_a3.cl | 4 +- OpenCL/m08100_a3.cl | 4 +- OpenCL/m08300_a3.cl | 4 +- OpenCL/m08400_a3.cl | 4 +- OpenCL/m09700_a3.cl | 4 +- OpenCL/m09710_a3.cl | 4 +- OpenCL/m09720_a3.cl | 4 +- OpenCL/m09800_a3.cl | 4 +- OpenCL/m09810_a3.cl | 4 +- OpenCL/m09820_a3.cl | 4 +- OpenCL/m10400_a3.cl | 4 +- OpenCL/m10410_a3.cl | 4 +- OpenCL/m10420_a3.cl | 4 +- OpenCL/m11000_a3.cl | 4 +- OpenCL/m11100_a3.cl | 4 +- OpenCL/m11200_a3.cl | 4 +- OpenCL/m11400_a3.cl | 16 ++--- OpenCL/m11700_a3.cl | 4 +- OpenCL/m11800_a3.cl | 4 +- OpenCL/m12600_a3.cl | 4 +- OpenCL/simd.c | 70 +++++++++++++----- src/oclHashcat.c | 171 ++++++++++++++++++++++---------------------- tools/test.sh | 2 +- 60 files changed, 313 insertions(+), 318 deletions(-) diff --git a/OpenCL/m00000_a1.cl b/OpenCL/m00000_a1.cl index 59d5e6893..6cc315aed 100644 --- a/OpenCL/m00000_a1.cl +++ b/OpenCL/m00000_a1.cl @@ -5,6 +5,8 @@ #define _MD5_ +#define NEW_SIMD_CODE + #include "include/constants.h" #include "include/kernel_vendor.h" @@ -16,9 +18,7 @@ #include "include/kernel_functions.c" #include "OpenCL/types_ocl.c" #include "OpenCL/common.c" - -#define COMPARE_S "OpenCL/check_single_comp4.c" -#define COMPARE_M "OpenCL/check_multi_comp4.c" +#include "OpenCL/simd.c" __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -68,86 +68,72 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) { - append_0x80_2x4 (wordl0, wordl1, pw_l_len); + append_0x80_2x4_S (wordl0, wordl1, pw_l_len); - switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); + switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); } /** * loop */ - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE) { - const u32 pw_r_len = combs_buf[il_pos].pw_len; + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); - const u32 pw_len = pw_l_len + pw_r_len; + const u32x pw_len = pw_l_len + pw_r_len; - u32 wordr0[4]; + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; - wordr0[0] = combs_buf[il_pos].i[0]; - wordr0[1] = combs_buf[il_pos].i[1]; - wordr0[2] = combs_buf[il_pos].i[2]; - wordr0[3] = combs_buf[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = combs_buf[il_pos].i[4]; - wordr1[1] = combs_buf[il_pos].i[5]; - wordr1[2] = combs_buf[il_pos].i[6]; - wordr1[3] = combs_buf[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); if (combs_mode == COMBINATOR_MODE_BASE_LEFT) { switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32 w0[4]; + u32x w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32 w1[4]; + u32x w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32 w2[4]; + u32x w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32 w3[4]; + u32x w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32 a = MD5M_A; - u32 b = MD5M_B; - u32 c = MD5M_C; - u32 d = MD5M_D; + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -217,12 +203,7 @@ __kernel void m00000_m04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32 r0 = a; - const u32 r1 = d; - const u32 r2 = c; - const u32 r3 = b; - - #include COMPARE_M + COMPARE_M_SIMD (a, d, c, b); } } @@ -282,9 +263,9 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) { - append_0x80_2x4 (wordl0, wordl1, pw_l_len); + append_0x80_2x4_S (wordl0, wordl1, pw_l_len); - switch_buffer_by_offset_le (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); + switch_buffer_by_offset_le_S (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); } /** @@ -303,77 +284,63 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, * loop */ - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) + for (u32 il_pos = 0; il_pos < combs_cnt; il_pos += VECT_SIZE) { - const u32 pw_r_len = combs_buf[il_pos].pw_len; + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos); - const u32 pw_len = pw_l_len + pw_r_len; + const u32x pw_len = pw_l_len + pw_r_len; - u32 wordr0[4]; + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; - wordr0[0] = combs_buf[il_pos].i[0]; - wordr0[1] = combs_buf[il_pos].i[1]; - wordr0[2] = combs_buf[il_pos].i[2]; - wordr0[3] = combs_buf[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = combs_buf[il_pos].i[4]; - wordr1[1] = combs_buf[il_pos].i[5]; - wordr1[2] = combs_buf[il_pos].i[6]; - wordr1[3] = combs_buf[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); if (combs_mode == COMBINATOR_MODE_BASE_LEFT) { switch_buffer_by_offset_le (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32 w0[4]; + u32x w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32 w1[4]; + u32x w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32 w2[4]; + u32x w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32 w3[4]; + u32x w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32 a = MD5M_A; - u32 b = MD5M_B; - u32 c = MD5M_C; - u32 d = MD5M_D; + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -440,20 +407,13 @@ __kernel void m00000_s04 (__global pw_t *pws, __global kernel_rule_t *rules_buf, MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - bool q_cond = allx (search[0] != a); - - if (q_cond) continue; + if (MATCHES_NONE_VS (a, search[0])) continue; MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32 r0 = a; - const u32 r1 = d; - const u32 r2 = c; - const u32 r3 = b; - - #include COMPARE_S + COMPARE_S_SIMD (a, d, c, b); } } diff --git a/OpenCL/m00020_a3.cl b/OpenCL/m00020_a3.cl index 7cb88eef7..d74fbd79c 100644 --- a/OpenCL/m00020_a3.cl +++ b/OpenCL/m00020_a3.cl @@ -118,7 +118,7 @@ static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -356,7 +356,7 @@ static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00040_a3.cl b/OpenCL/m00040_a3.cl index 3ea81d98e..a60c6d467 100644 --- a/OpenCL/m00040_a3.cl +++ b/OpenCL/m00040_a3.cl @@ -118,7 +118,7 @@ static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -356,7 +356,7 @@ static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00050_a3.cl b/OpenCL/m00050_a3.cl index 13a751b89..a954bf4ad 100644 --- a/OpenCL/m00050_a3.cl +++ b/OpenCL/m00050_a3.cl @@ -254,7 +254,7 @@ static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -383,7 +383,7 @@ static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00060_a3.cl b/OpenCL/m00060_a3.cl index ee4811d27..3d5419c4f 100644 --- a/OpenCL/m00060_a3.cl +++ b/OpenCL/m00060_a3.cl @@ -275,7 +275,7 @@ static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0rl = w0r | w0l; @@ -390,7 +390,7 @@ static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0rl = w0r | w0l; diff --git a/OpenCL/m00120_a3.cl b/OpenCL/m00120_a3.cl index de239dbe1..4993666a6 100644 --- a/OpenCL/m00120_a3.cl +++ b/OpenCL/m00120_a3.cl @@ -135,7 +135,7 @@ static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -425,7 +425,7 @@ static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00140_a3.cl b/OpenCL/m00140_a3.cl index 12b380c89..a5db7daad 100644 --- a/OpenCL/m00140_a3.cl +++ b/OpenCL/m00140_a3.cl @@ -135,7 +135,7 @@ static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -425,7 +425,7 @@ static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00150_a3.cl b/OpenCL/m00150_a3.cl index 219e3cd7a..860af0d0e 100644 --- a/OpenCL/m00150_a3.cl +++ b/OpenCL/m00150_a3.cl @@ -274,7 +274,7 @@ static void m00150m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -389,7 +389,7 @@ static void m00150s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m00160_a3.cl b/OpenCL/m00160_a3.cl index 126927be8..7cc590202 100644 --- a/OpenCL/m00160_a3.cl +++ b/OpenCL/m00160_a3.cl @@ -309,7 +309,7 @@ static void m00160m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -422,7 +422,7 @@ static void m00160s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01420_a3.cl b/OpenCL/m01420_a3.cl index 9250b0216..3e194c285 100644 --- a/OpenCL/m01420_a3.cl +++ b/OpenCL/m01420_a3.cl @@ -135,7 +135,7 @@ static void m01420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -385,7 +385,7 @@ static void m01420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01440_a3.cl b/OpenCL/m01440_a3.cl index b8289d0a9..64214ef97 100644 --- a/OpenCL/m01440_a3.cl +++ b/OpenCL/m01440_a3.cl @@ -135,7 +135,7 @@ static void m01440m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -385,7 +385,7 @@ static void m01440s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01450_a3.cl b/OpenCL/m01450_a3.cl index 683f1dd95..9f702dd79 100644 --- a/OpenCL/m01450_a3.cl +++ b/OpenCL/m01450_a3.cl @@ -264,7 +264,7 @@ static void m01450m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -379,7 +379,7 @@ static void m01450s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01460_a3.cl b/OpenCL/m01460_a3.cl index d2406572f..5130663ad 100644 --- a/OpenCL/m01460_a3.cl +++ b/OpenCL/m01460_a3.cl @@ -299,7 +299,7 @@ static void m01460m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -412,7 +412,7 @@ static void m01460s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01720_a3.cl b/OpenCL/m01720_a3.cl index cd667943a..8d0eb13d9 100644 --- a/OpenCL/m01720_a3.cl +++ b/OpenCL/m01720_a3.cl @@ -257,7 +257,7 @@ static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -457,7 +457,7 @@ static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01740_a3.cl b/OpenCL/m01740_a3.cl index 857095d93..2af4e3b42 100644 --- a/OpenCL/m01740_a3.cl +++ b/OpenCL/m01740_a3.cl @@ -257,7 +257,7 @@ static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -457,7 +457,7 @@ static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01750_a3.cl b/OpenCL/m01750_a3.cl index 879248fe5..c1b92e577 100644 --- a/OpenCL/m01750_a3.cl +++ b/OpenCL/m01750_a3.cl @@ -295,7 +295,7 @@ static void m01750m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -415,7 +415,7 @@ static void m01750s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m01760_a3.cl b/OpenCL/m01760_a3.cl index 461a733d6..fe9e80906 100644 --- a/OpenCL/m01760_a3.cl +++ b/OpenCL/m01760_a3.cl @@ -330,7 +330,7 @@ static void m01760m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -448,7 +448,7 @@ static void m01760s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m02610_a3.cl b/OpenCL/m02610_a3.cl index 0fdd37f68..f6894f8db 100644 --- a/OpenCL/m02610_a3.cl +++ b/OpenCL/m02610_a3.cl @@ -69,7 +69,7 @@ static void m02610m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -329,7 +329,7 @@ static void m02610s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m02710_a3.cl b/OpenCL/m02710_a3.cl index d943ba2ee..988a8702d 100644 --- a/OpenCL/m02710_a3.cl +++ b/OpenCL/m02710_a3.cl @@ -71,7 +71,7 @@ static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -416,7 +416,7 @@ static void m02710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m02810_a3.cl b/OpenCL/m02810_a3.cl index 467be23dc..892f9ad41 100644 --- a/OpenCL/m02810_a3.cl +++ b/OpenCL/m02810_a3.cl @@ -70,7 +70,7 @@ static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -414,7 +414,7 @@ static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m03710_a3.cl b/OpenCL/m03710_a3.cl index 17ebf2f2a..26f8f6791 100644 --- a/OpenCL/m03710_a3.cl +++ b/OpenCL/m03710_a3.cl @@ -88,7 +88,7 @@ static void m03710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -396,7 +396,7 @@ static void m03710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m03800_a3.cl b/OpenCL/m03800_a3.cl index 502295c96..1bc3c9aa3 100644 --- a/OpenCL/m03800_a3.cl +++ b/OpenCL/m03800_a3.cl @@ -77,7 +77,7 @@ static void m03800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -335,7 +335,7 @@ static void m03800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m04310_a3.cl b/OpenCL/m04310_a3.cl index 0ef9d7e1c..8b2e39615 100644 --- a/OpenCL/m04310_a3.cl +++ b/OpenCL/m04310_a3.cl @@ -68,7 +68,7 @@ static void m04310m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -328,7 +328,7 @@ static void m04310s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m04400_a3.cl b/OpenCL/m04400_a3.cl index 94d00fc0a..94f194ed8 100644 --- a/OpenCL/m04400_a3.cl +++ b/OpenCL/m04400_a3.cl @@ -55,7 +55,7 @@ static void m04400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -326,7 +326,7 @@ static void m04400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m04500_a3.cl b/OpenCL/m04500_a3.cl index 65f2bf900..3fe5d4fe1 100644 --- a/OpenCL/m04500_a3.cl +++ b/OpenCL/m04500_a3.cl @@ -51,7 +51,7 @@ static void m04500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -357,7 +357,7 @@ static void m04500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m04700_a3.cl b/OpenCL/m04700_a3.cl index 3b5358a60..bc78b9b4c 100644 --- a/OpenCL/m04700_a3.cl +++ b/OpenCL/m04700_a3.cl @@ -52,7 +52,7 @@ static void m04700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -340,7 +340,7 @@ static void m04700s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m04800_a3.cl b/OpenCL/m04800_a3.cl index 6bf5ce025..502cb6ada 100644 --- a/OpenCL/m04800_a3.cl +++ b/OpenCL/m04800_a3.cl @@ -142,7 +142,7 @@ static void m04800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); w0_t[0] = w0l | (w0r << 8); w0_t[1] = w1l | (w0r >> 24); @@ -362,7 +362,7 @@ static void m04800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); w0_t[0] = w0l | (w0r << 8); w0_t[1] = w1l | (w0r >> 24); diff --git a/OpenCL/m04900_a3.cl b/OpenCL/m04900_a3.cl index 69e1fdafc..5da3aee08 100644 --- a/OpenCL/m04900_a3.cl +++ b/OpenCL/m04900_a3.cl @@ -129,7 +129,7 @@ static void m04900m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -437,7 +437,7 @@ static void m04900s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m05000_a3.cl b/OpenCL/m05000_a3.cl index eff5dc319..5dd98457a 100644 --- a/OpenCL/m05000_a3.cl +++ b/OpenCL/m05000_a3.cl @@ -113,7 +113,7 @@ static void m05000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -273,7 +273,7 @@ static void m05000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m05100_a3.cl b/OpenCL/m05100_a3.cl index 4de1ac840..8f60352e0 100644 --- a/OpenCL/m05100_a3.cl +++ b/OpenCL/m05100_a3.cl @@ -43,7 +43,7 @@ static void m05100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -171,7 +171,7 @@ static void m05100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m05300_a3.cl b/OpenCL/m05300_a3.cl index 705fcd00f..d4de819e3 100644 --- a/OpenCL/m05300_a3.cl +++ b/OpenCL/m05300_a3.cl @@ -229,7 +229,7 @@ static void m05300m (__local u32 *w_s, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -394,7 +394,7 @@ static void m05300s (__local u32 *w_s, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m05400_a3.cl b/OpenCL/m05400_a3.cl index 213bc3696..14f5ab119 100644 --- a/OpenCL/m05400_a3.cl +++ b/OpenCL/m05400_a3.cl @@ -263,7 +263,7 @@ static void m05400m (__local u32 *w_s, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -428,7 +428,7 @@ static void m05400s (__local u32 *w_s, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4 for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m05600_a3.cl b/OpenCL/m05600_a3.cl index f2ee60866..acf686d80 100644 --- a/OpenCL/m05600_a3.cl +++ b/OpenCL/m05600_a3.cl @@ -313,7 +313,7 @@ static void m05600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -530,7 +530,7 @@ static void m05600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m06000_a3.cl b/OpenCL/m06000_a3.cl index 689a7716c..2f40a955a 100644 --- a/OpenCL/m06000_a3.cl +++ b/OpenCL/m06000_a3.cl @@ -240,7 +240,7 @@ static void m06000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -312,7 +312,7 @@ static void m06000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m06100_a3.cl b/OpenCL/m06100_a3.cl index 6e5399a8f..fbacce55b 100644 --- a/OpenCL/m06100_a3.cl +++ b/OpenCL/m06100_a3.cl @@ -1363,7 +1363,7 @@ static void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -1423,7 +1423,7 @@ static void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m06900_a3.cl b/OpenCL/m06900_a3.cl index 5d1fef8b2..9ce38b323 100644 --- a/OpenCL/m06900_a3.cl +++ b/OpenCL/m06900_a3.cl @@ -729,7 +729,7 @@ static void m06900m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -917,7 +917,7 @@ static void m06900s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m07300_a3.cl b/OpenCL/m07300_a3.cl index bea9dbe09..0044cc5de 100644 --- a/OpenCL/m07300_a3.cl +++ b/OpenCL/m07300_a3.cl @@ -256,7 +256,7 @@ static void m07300m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -380,7 +380,7 @@ static void m07300s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m07600_a3.cl b/OpenCL/m07600_a3.cl index 01a461de1..4fe62fe35 100644 --- a/OpenCL/m07600_a3.cl +++ b/OpenCL/m07600_a3.cl @@ -86,7 +86,7 @@ static void m07600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -594,7 +594,7 @@ static void m07600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m08100_a3.cl b/OpenCL/m08100_a3.cl index 0dac6eab7..90adec836 100644 --- a/OpenCL/m08100_a3.cl +++ b/OpenCL/m08100_a3.cl @@ -50,7 +50,7 @@ static void m08100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -238,7 +238,7 @@ static void m08100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m08300_a3.cl b/OpenCL/m08300_a3.cl index 051c90576..bbe692190 100644 --- a/OpenCL/m08300_a3.cl +++ b/OpenCL/m08300_a3.cl @@ -267,7 +267,7 @@ static void m08300m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -521,7 +521,7 @@ static void m08300s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m08400_a3.cl b/OpenCL/m08400_a3.cl index a1cae8cde..fb0f00de0 100644 --- a/OpenCL/m08400_a3.cl +++ b/OpenCL/m08400_a3.cl @@ -207,7 +207,7 @@ static void m08400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -435,7 +435,7 @@ static void m08400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m09700_a3.cl b/OpenCL/m09700_a3.cl index 060bd9c23..e13d0d906 100644 --- a/OpenCL/m09700_a3.cl +++ b/OpenCL/m09700_a3.cl @@ -303,7 +303,7 @@ static void m09700m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; @@ -806,7 +806,7 @@ static void m09700s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; diff --git a/OpenCL/m09710_a3.cl b/OpenCL/m09710_a3.cl index e33049e0a..096bc3aca 100644 --- a/OpenCL/m09710_a3.cl +++ b/OpenCL/m09710_a3.cl @@ -269,7 +269,7 @@ static void m09710m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; @@ -395,7 +395,7 @@ static void m09710s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; diff --git a/OpenCL/m09720_a3.cl b/OpenCL/m09720_a3.cl index 788214ff0..0fe2f8149 100644 --- a/OpenCL/m09720_a3.cl +++ b/OpenCL/m09720_a3.cl @@ -490,7 +490,7 @@ static void m09720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -588,7 +588,7 @@ static void m09720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m09800_a3.cl b/OpenCL/m09800_a3.cl index c2b741e03..e0586d409 100644 --- a/OpenCL/m09800_a3.cl +++ b/OpenCL/m09800_a3.cl @@ -314,7 +314,7 @@ static void m09800m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; @@ -490,7 +490,7 @@ static void m09800s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; diff --git a/OpenCL/m09810_a3.cl b/OpenCL/m09810_a3.cl index d04959494..34d2e325b 100644 --- a/OpenCL/m09810_a3.cl +++ b/OpenCL/m09810_a3.cl @@ -299,7 +299,7 @@ static void m09810m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; @@ -403,7 +403,7 @@ static void m09810s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; diff --git a/OpenCL/m09820_a3.cl b/OpenCL/m09820_a3.cl index bd56b0a43..3cf8d70a9 100644 --- a/OpenCL/m09820_a3.cl +++ b/OpenCL/m09820_a3.cl @@ -180,7 +180,7 @@ static void m09820m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -294,7 +294,7 @@ static void m09820s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m10400_a3.cl b/OpenCL/m10400_a3.cl index a8822733d..1d31f17df 100644 --- a/OpenCL/m10400_a3.cl +++ b/OpenCL/m10400_a3.cl @@ -275,7 +275,7 @@ static void m10400m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); w0[0] = w0l | w0r; @@ -428,7 +428,7 @@ static void m10400s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); w0[0] = w0l | w0r; diff --git a/OpenCL/m10410_a3.cl b/OpenCL/m10410_a3.cl index b04d7923e..41223c2af 100644 --- a/OpenCL/m10410_a3.cl +++ b/OpenCL/m10410_a3.cl @@ -152,7 +152,7 @@ static void m10410m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; @@ -206,7 +206,7 @@ static void m10410s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4], for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32 w0r = w0r_create_bft (bfs_buf, il_pos); + const u32 w0r = ix_create_bft (bfs_buf, il_pos); const u32 w0lr = w0l | w0r; diff --git a/OpenCL/m10420_a3.cl b/OpenCL/m10420_a3.cl index 3e4191c8c..af2a6fb6c 100644 --- a/OpenCL/m10420_a3.cl +++ b/OpenCL/m10420_a3.cl @@ -171,7 +171,7 @@ static void m10420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -312,7 +312,7 @@ static void m10420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11000_a3.cl b/OpenCL/m11000_a3.cl index e2fb91f21..9e3dfb5a0 100644 --- a/OpenCL/m11000_a3.cl +++ b/OpenCL/m11000_a3.cl @@ -71,7 +71,7 @@ static void m11000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -354,7 +354,7 @@ static void m11000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11100_a3.cl b/OpenCL/m11100_a3.cl index 4a27ef151..39bc2840b 100644 --- a/OpenCL/m11100_a3.cl +++ b/OpenCL/m11100_a3.cl @@ -98,7 +98,7 @@ static void m11100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -397,7 +397,7 @@ static void m11100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11200_a3.cl b/OpenCL/m11200_a3.cl index 8f110bae9..a626126a9 100644 --- a/OpenCL/m11200_a3.cl +++ b/OpenCL/m11200_a3.cl @@ -49,7 +49,7 @@ static void m11200m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -492,7 +492,7 @@ static void m11200s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11400_a3.cl b/OpenCL/m11400_a3.cl index 87d9511ee..e1a780cc2 100644 --- a/OpenCL/m11400_a3.cl +++ b/OpenCL/m11400_a3.cl @@ -877,7 +877,7 @@ static void m11400m_0_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -1409,7 +1409,7 @@ static void m11400m_0_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -2022,7 +2022,7 @@ static void m11400m_1_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -2652,7 +2652,7 @@ static void m11400m_1_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -3363,7 +3363,7 @@ static void m11400s_0_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -3895,7 +3895,7 @@ static void m11400s_0_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -4508,7 +4508,7 @@ static void m11400s_1_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -5138,7 +5138,7 @@ static void m11400s_1_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 p for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11700_a3.cl b/OpenCL/m11700_a3.cl index dc50cc26e..e152b004d 100644 --- a/OpenCL/m11700_a3.cl +++ b/OpenCL/m11700_a3.cl @@ -2314,7 +2314,7 @@ static void m11700m (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -2409,7 +2409,7 @@ static void m11700s (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m11800_a3.cl b/OpenCL/m11800_a3.cl index 0f61a7224..78b2fe77a 100644 --- a/OpenCL/m11800_a3.cl +++ b/OpenCL/m11800_a3.cl @@ -2314,7 +2314,7 @@ static void m11800m (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -2409,7 +2409,7 @@ static void m11800s (__local u64 (*s_sbob_sl64)[256], u32 w[16], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/m12600_a3.cl b/OpenCL/m12600_a3.cl index fdac3adc6..af8368070 100644 --- a/OpenCL/m12600_a3.cl +++ b/OpenCL/m12600_a3.cl @@ -67,7 +67,7 @@ static void m12600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; @@ -370,7 +370,7 @@ static void m12600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_le for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos += VECT_SIZE) { - const u32x w0r = w0r_create_bft (bfs_buf, il_pos); + const u32x w0r = ix_create_bft (bfs_buf, il_pos); const u32x w0lr = w0l | w0r; diff --git a/OpenCL/simd.c b/OpenCL/simd.c index b520aefd1..c39863b06 100644 --- a/OpenCL/simd.c +++ b/OpenCL/simd.c @@ -1171,43 +1171,79 @@ // attack-mode 0 -static inline u32x w0r_create_bft (__global bf_t *bfs_buf, const u32 il_pos) +static inline u32x ix_create_bft (__global bf_t *bfs_buf, const u32 il_pos) { #if VECT_SIZE == 1 - const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i); + const u32x ix = (u32x) (bfs_buf[il_pos + 0].i); #elif VECT_SIZE == 2 - const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i); + const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i); #elif VECT_SIZE == 4 - const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i); + const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i); #elif VECT_SIZE == 8 - const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i); + const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i); #elif VECT_SIZE == 16 - const u32x w0r = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i); + const u32x ix = (u32x) (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i); #endif - return w0r; + return ix; +} + +// attack-mode 1 + +static inline u32x pwlenx_create_combt (__global comb_t *combs_buf, const u32 il_pos) +{ + #if VECT_SIZE == 1 + const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len); + #elif VECT_SIZE == 2 + const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len); + #elif VECT_SIZE == 4 + const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len); + #elif VECT_SIZE == 8 + const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len); + #elif VECT_SIZE == 16 + const u32x pw_lenx = (u32x) (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len); + #endif + + return pw_lenx; +} + +static inline u32x ix_create_combt (__global comb_t *combs_buf, const u32 il_pos, const int idx) +{ + #if VECT_SIZE == 1 + const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx]); + #elif VECT_SIZE == 2 + const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]); + #elif VECT_SIZE == 4 + const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]); + #elif VECT_SIZE == 8 + const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]); + #elif VECT_SIZE == 16 + const u32x ix = (u32x) (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]); + #endif + + return ix; } #if VECT_SIZE == 1 -#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)]) +#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 1) + 0].var[(idx)]) #elif VECT_SIZE == 2 -#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)]) +#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 2) + 0].var[(idx)], (arr)[((gid) * 2) + 1].var[(idx)]) #elif VECT_SIZE == 4 -#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)]) +#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 4) + 0].var[(idx)], (arr)[((gid) * 4) + 1].var[(idx)], (arr)[((gid) * 4) + 2].var[(idx)], (arr)[((gid) * 4) + 3].var[(idx)]) #elif VECT_SIZE == 8 -#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)]) +#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)]) #elif VECT_SIZE == 16 -#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 8) + 0].var[(idx)], (arr)[((gid) * 8) + 1].var[(idx)], (arr)[((gid) * 8) + 2].var[(idx)], (arr)[((gid) * 8) + 3].var[(idx)], (arr)[((gid) * 8) + 4].var[(idx)], (arr)[((gid) * 8) + 5].var[(idx)], (arr)[((gid) * 8) + 6].var[(idx)], (arr)[((gid) * 8) + 7].var[(idx)], (arr)[((gid) * 8) + 8].var[(idx)], (arr)[((gid) * 8) + 9].var[(idx)], (arr)[((gid) * 8) + 10].var[(idx)], (arr)[((gid) * 8) + 11].var[(idx)], (arr)[((gid) * 8) + 12].var[(idx)], (arr)[((gid) * 8) + 13].var[(idx)], (arr)[((gid) * 8) + 14].var[(idx)], (arr)[((gid) * 8) + 15].var[(idx)]) +#define packv(arr,var,gid,idx) (u32x) ((arr)[((gid) * 16) + 0].var[(idx)], (arr)[((gid) * 16) + 1].var[(idx)], (arr)[((gid) * 16) + 2].var[(idx)], (arr)[((gid) * 16) + 3].var[(idx)], (arr)[((gid) * 16) + 4].var[(idx)], (arr)[((gid) * 16) + 5].var[(idx)], (arr)[((gid) * 16) + 6].var[(idx)], (arr)[((gid) * 16) + 7].var[(idx)], (arr)[((gid) * 16) + 8].var[(idx)], (arr)[((gid) * 16) + 9].var[(idx)], (arr)[((gid) * 16) + 10].var[(idx)], (arr)[((gid) * 16) + 11].var[(idx)], (arr)[((gid) * 16) + 12].var[(idx)], (arr)[((gid) * 16) + 13].var[(idx)], (arr)[((gid) * 16) + 14].var[(idx)], (arr)[((gid) * 16) + 15].var[(idx)]) #endif #if VECT_SIZE == 1 -#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val; +#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 1) + 0].var[(idx)] = val; #elif VECT_SIZE == 2 -#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1; +#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 2) + 0].var[(idx)] = val.s0; (arr)[((gid) * 2) + 1].var[(idx)] = val.s1; #elif VECT_SIZE == 4 -#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3; +#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 4) + 0].var[(idx)] = val.s0; (arr)[((gid) * 4) + 1].var[(idx)] = val.s1; (arr)[((gid) * 4) + 2].var[(idx)] = val.s2; (arr)[((gid) * 4) + 3].var[(idx)] = val.s3; #elif VECT_SIZE == 8 -#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7; +#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7; #elif VECT_SIZE == 16 -#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 8) + 0].var[(idx)] = val.s0; (arr)[((gid) * 8) + 1].var[(idx)] = val.s1; (arr)[((gid) * 8) + 2].var[(idx)] = val.s2; (arr)[((gid) * 8) + 3].var[(idx)] = val.s3; (arr)[((gid) * 8) + 4].var[(idx)] = val.s4; (arr)[((gid) * 8) + 5].var[(idx)] = val.s5; (arr)[((gid) * 8) + 6].var[(idx)] = val.s6; (arr)[((gid) * 8) + 7].var[(idx)] = val.s7; (arr)[((gid) * 8) + 8].var[(idx)] = val.s8; (arr)[((gid) * 8) + 9].var[(idx)] = val.s9; (arr)[((gid) * 8) + 10].var[(idx)] = val.sa; (arr)[((gid) * 8) + 11].var[(idx)] = val.sb; (arr)[((gid) * 8) + 12].var[(idx)] = val.sc; (arr)[((gid) * 8) + 13].var[(idx)] = val.sd; (arr)[((gid) * 8) + 14].var[(idx)] = val.se; (arr)[((gid) * 8) + 15].var[(idx)] = val.sf; +#define unpackv(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] = val.s0; (arr)[((gid) * 16) + 1].var[(idx)] = val.s1; (arr)[((gid) * 16) + 2].var[(idx)] = val.s2; (arr)[((gid) * 16) + 3].var[(idx)] = val.s3; (arr)[((gid) * 16) + 4].var[(idx)] = val.s4; (arr)[((gid) * 16) + 5].var[(idx)] = val.s5; (arr)[((gid) * 16) + 6].var[(idx)] = val.s6; (arr)[((gid) * 16) + 7].var[(idx)] = val.s7; (arr)[((gid) * 16) + 8].var[(idx)] = val.s8; (arr)[((gid) * 16) + 9].var[(idx)] = val.s9; (arr)[((gid) * 16) + 10].var[(idx)] = val.sa; (arr)[((gid) * 16) + 11].var[(idx)] = val.sb; (arr)[((gid) * 16) + 12].var[(idx)] = val.sc; (arr)[((gid) * 16) + 13].var[(idx)] = val.sd; (arr)[((gid) * 16) + 14].var[(idx)] = val.se; (arr)[((gid) * 16) + 15].var[(idx)] = val.sf; #endif diff --git a/src/oclHashcat.c b/src/oclHashcat.c index 9c54529f7..996baeb2c 100644 --- a/src/oclHashcat.c +++ b/src/oclHashcat.c @@ -220,12 +220,12 @@ static uint default_benchmark_algorithms[NUM_DEFAULT_BENCHMARK_ALGORITHMS] = 101, 111, 1711, - 3000, // broken in osx + 3000, 1000, 1100, 2100, 12800, - 1500, // broken in osx + 1500, 12400, 500, 3200, @@ -278,7 +278,7 @@ static uint default_benchmark_algorithms[NUM_DEFAULT_BENCHMARK_ALGORITHMS] = 10410, 10500, 10600, - 10700, // broken in osx + 10700, 9000, 5200, 6800, @@ -2877,7 +2877,7 @@ static void autotune (hc_device_param_t *device_param) u32 steps_accel[STEPS_ACCEL_CNT]; u32 steps_loops[STEPS_LOOPS_CNT]; - for (int i = 1; i < STEPS_ACCEL_CNT; i++) + for (int i = 0; i < STEPS_ACCEL_CNT; i++) { steps_accel[i] = 1 << i; } @@ -2900,62 +2900,66 @@ static void autotune (hc_device_param_t *device_param) u32 kernel_loops_tmp; - for (kernel_loops_tmp = kernel_loops_max; kernel_loops_tmp >= kernel_loops_min; kernel_loops_tmp >>= 1) + for (kernel_loops_tmp = kernel_loops_max; kernel_loops_tmp > kernel_loops_min; kernel_loops_tmp >>= 1) { const double exec_ms = try_run (device_param, kernel_accel_min, kernel_loops_tmp, 1); if (exec_ms < target_ms) break; - - if (kernel_loops_tmp == kernel_loops_min) break; } // kernel-accel - double e_best = 0; - - for (int i = 0; i < STEPS_ACCEL_CNT; i++) + if (kernel_accel_min < kernel_accel_max) { - const u32 kernel_accel_try = steps_accel[i]; + double e_best = 0; - if (kernel_accel_try < kernel_accel_min) continue; - if (kernel_accel_try > kernel_accel_max) break; - - const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_tmp, 1); - - if (exec_ms > target_ms) break; - - const double e = kernel_accel_try / exec_ms; - - if (e > e_best) + for (int i = 0; i < STEPS_ACCEL_CNT; i++) { - kernel_accel = kernel_accel_try; + const u32 kernel_accel_try = steps_accel[i]; - e_best = e; + if (kernel_accel_try < kernel_accel_min) continue; + if (kernel_accel_try > kernel_accel_max) break; + + const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_tmp, 1); + + if (exec_ms > target_ms) break; + + const double e = kernel_accel_try / exec_ms; + + if (e > e_best) + { + kernel_accel = kernel_accel_try; + + e_best = e; + } } } // kernel-loops final - e_best = 0; - - for (int i = 0; i < STEPS_LOOPS_CNT; i++) + if (kernel_loops_min < kernel_loops_max) { - const u32 kernel_loops_try = steps_loops[i]; + double e_best = 0; - if (kernel_loops_try < kernel_loops_min) continue; - if (kernel_loops_try > kernel_loops_max) break; - - const double exec_ms = try_run (device_param, kernel_accel, kernel_loops_try, 1); - - if (exec_ms > target_ms) break; - - const double e = kernel_loops_try / exec_ms; - - if (e > e_best) + for (int i = 0; i < STEPS_LOOPS_CNT; i++) { - kernel_loops = kernel_loops_try; + const u32 kernel_loops_try = steps_loops[i]; - e_best = e; + if (kernel_loops_try < kernel_loops_min) continue; + if (kernel_loops_try > kernel_loops_max) break; + + const double exec_ms = try_run (device_param, kernel_accel, kernel_loops_try, 1); + + if (exec_ms > target_ms) break; + + const double e = kernel_loops_try / exec_ms; + + if (e > e_best) + { + kernel_loops = kernel_loops_try; + + e_best = e; + } } } @@ -2970,54 +2974,64 @@ static void autotune (hc_device_param_t *device_param) // reset - u32 kernel_accel_try = kernel_accel; - u32 kernel_loops_try = kernel_loops; - - for (int i = 0; i < 2; i++) + if (kernel_accel_min < kernel_accel_max) { - kernel_accel_try >>= 1; - kernel_loops_try <<= 1; + u32 kernel_accel_try = kernel_accel; + u32 kernel_loops_try = kernel_loops; - if (kernel_accel_try < kernel_accel_min) break; - if (kernel_loops_try > kernel_loops_max) break; - - const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1); - - if (exec_ms < exec_best) + for (int i = 0; i < 2; i++) { - kernel_accel_best = kernel_accel_try; - kernel_loops_best = kernel_loops_try; + kernel_accel_try >>= 1; + kernel_loops_try <<= 1; - exec_best = exec_ms; + if (kernel_accel_try < kernel_accel_min) break; + if (kernel_loops_try > kernel_loops_max) break; + + const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1); + + if (exec_ms < exec_best) + { + kernel_accel_best = kernel_accel_try; + kernel_loops_best = kernel_loops_try; + + exec_best = exec_ms; + } } + + kernel_accel = kernel_accel_best; + kernel_loops = kernel_loops_best; } // reset - kernel_accel_try = kernel_accel; - kernel_loops_try = kernel_loops; - for (int i = 0; i < 2; i++) + if (kernel_loops_min < kernel_loops_max) { - kernel_accel_try <<= 1; - kernel_loops_try >>= 1; + u32 kernel_accel_try = kernel_accel; + u32 kernel_loops_try = kernel_loops; - if (kernel_accel_try > kernel_accel_max) break; - if (kernel_loops_try < kernel_loops_min) break; - - const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1); - - if (exec_ms < exec_best) + for (int i = 0; i < 2; i++) { - kernel_accel_best = kernel_accel_try; - kernel_loops_best = kernel_loops_try; + kernel_accel_try <<= 1; + kernel_loops_try >>= 1; - exec_best = exec_ms; + if (kernel_accel_try > kernel_accel_max) break; + if (kernel_loops_try < kernel_loops_min) break; + + const double exec_ms = try_run (device_param, kernel_accel_try, kernel_loops_try, 1); + + if (exec_ms < exec_best) + { + kernel_accel_best = kernel_accel_try; + kernel_loops_best = kernel_loops_try; + + exec_best = exec_ms; + } } - } - kernel_accel = kernel_accel_best; - kernel_loops = kernel_loops_best; + kernel_accel = kernel_accel_best; + kernel_loops = kernel_loops_best; + } // reset timer @@ -13431,21 +13445,6 @@ int main (int argc, char **argv) #endif // HAVE_ADK #endif // HAVE_HWMON - #ifdef OSX - if (hash_mode == 3000 || hash_mode == 1500 || hash_mode == 10700) - { - if (force == 0) - { - log_info (""); - log_info ("Warning: Hash mode %d is not stable with OSX.", hash_mode); - log_info ("You can use --force to override this but do not post error reports if you do so"); - log_info (""); - - continue; - } - } - #endif - #ifdef DEBUG if (benchmark == 1) log_info ("Hashmode: %d", data.hash_mode); #endif diff --git a/tools/test.sh b/tools/test.sh index 3a0577e35..cfbbff77b 100755 --- a/tools/test.sh +++ b/tools/test.sh @@ -25,7 +25,7 @@ NEVER_CRACK="11600" SLOW_ALGOS="400 500 501 1600 1800 2100 2500 3200 5200 5800 6211 6221 6231 6241 6251 6261 6271 6281 6300 6400 6500 6600 6700 6800 7100 7200 7400 7900 8200 8800 8900 9000 9100 9200 9300 9400 9500 9600 10000 10300 10500 10700 10900 11300 11600 11900 12000 12100 12200 12300 12400 12500 12800 12900 13000" -OPTS="--quiet --force --potfile-disable --runtime 200 --gpu-temp-disable --weak-hash-threshold=0" +OPTS="--quiet --force --potfile-disable --runtime 200 --gpu-temp-disable --weak-hash-threshold=0 -u 1024 -n 128" OUTD="test_$(date +%s)"