From 7832c544520fb24cf7c9da5e3566864b85e16e76 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 11 May 2019 09:32:16 +0200 Subject: [PATCH] Fix constant memory use of bfs_buf --- OpenCL/inc_common.h | 38 +++++++++++++++++++------------------- OpenCL/inc_platform.cl | 3 +++ OpenCL/inc_simd.cl | 36 ++++++++++++++++++------------------ OpenCL/inc_simd.h | 6 +++--- 4 files changed, 43 insertions(+), 40 deletions(-) diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index e323d0e0a..58a6bbeb5 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -110,27 +110,27 @@ */ #ifdef IS_CUDA -#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void) -#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *g_words_buf_s, void, void, void) -#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e) +#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void) +#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bs_word_t *g_words_buf_s, void, void, void) +#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e) +#define KERN_ATTR_RULES() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void) +#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e) +#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, void, void) +#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, void, e) +#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, h, void) +#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const u32x *g_words_buf_r, void, void, void) +#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const u32x *g_words_buf_r, void, void, e) +#else +#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, void, void, void) +#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_s, void, void, void) +#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, void, void, e) #define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void) #define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e) -#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, void) -#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, e) -#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void) -#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, void) -#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, e) -#else -#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void) -#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_s, void, void, void) -#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e) -#define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void) -#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e) -#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, void) -#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, e) -#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void) -#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, void) -#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, e) +#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, void, void) +#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, void, e) +#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, h, void) +#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, void) +#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, e) #endif // union based packing diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 3606804b4..16761cc27 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -18,15 +18,18 @@ CONSTANT_VK u32 generic_constant[8192]; // 32k #if ATTACK_KERN == 0 +#define bfs_buf g_bfs_buf #define rules_buf ((const kernel_rule_t *) generic_constant) #define words_buf_s g_words_buf_s #define words_buf_r g_words_buf_r #elif ATTACK_KERN == 1 +#define bfs_buf g_bfs_buf #define rules_buf g_rules_buf #define words_buf_s g_words_buf_s #define words_buf_r g_words_buf_r #elif ATTACK_KERN == 3 #define rules_buf g_rules_buf +#define bfs_buf ((const bf_t *) generic_constant) #define words_buf_s ((const bs_word_t *) generic_constant) #define words_buf_r ((const u32x *) generic_constant) #endif diff --git a/OpenCL/inc_simd.cl b/OpenCL/inc_simd.cl index 329270140..7bedf35cb 100644 --- a/OpenCL/inc_simd.cl +++ b/OpenCL/inc_simd.cl @@ -11,18 +11,18 @@ // attack-mode 0 -DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos) +DECLSPEC u32x ix_create_bft (CONSTANT_AS const bf_t *arr, const u32 il_pos) { #if VECT_SIZE == 1 - const u32x ix = make_u32x (bfs_buf[il_pos + 0].i); + const u32x ix = make_u32x (arr[il_pos + 0].i); #elif VECT_SIZE == 2 - const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i); + const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i); #elif VECT_SIZE == 4 - const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i); + const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i); #elif VECT_SIZE == 8 - const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i); + const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i); #elif VECT_SIZE == 16 - const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i); + const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i, arr[il_pos + 8].i, arr[il_pos + 9].i, arr[il_pos + 10].i, arr[il_pos + 11].i, arr[il_pos + 12].i, arr[il_pos + 13].i, arr[il_pos + 14].i, arr[il_pos + 15].i); #endif return ix; @@ -30,35 +30,35 @@ DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos) // attack-mode 1 -DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos) +DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos) { #if VECT_SIZE == 1 - const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len); + const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len); #elif VECT_SIZE == 2 - const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len); + const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len); #elif VECT_SIZE == 4 - const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len); + const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len); #elif VECT_SIZE == 8 - const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len); + const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len); #elif VECT_SIZE == 16 - const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len); + const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len, arr[il_pos + 8].pw_len, arr[il_pos + 9].pw_len, arr[il_pos + 10].pw_len, arr[il_pos + 11].pw_len, arr[il_pos + 12].pw_len, arr[il_pos + 13].pw_len, arr[il_pos + 14].pw_len, arr[il_pos + 15].pw_len); #endif return pw_lenx; } -DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx) +DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos, const int idx) { #if VECT_SIZE == 1 - const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx]); + const u32x ix = make_u32x (arr[il_pos + 0].i[idx]); #elif VECT_SIZE == 2 - const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]); + const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx]); #elif VECT_SIZE == 4 - const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]); + const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx]); #elif VECT_SIZE == 8 - const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]); + const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx]); #elif VECT_SIZE == 16 - const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]); + const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx], arr[il_pos + 8].i[idx], arr[il_pos + 9].i[idx], arr[il_pos + 10].i[idx], arr[il_pos + 11].i[idx], arr[il_pos + 12].i[idx], arr[il_pos + 13].i[idx], arr[il_pos + 14].i[idx], arr[il_pos + 15].i[idx]); #endif return ix; diff --git a/OpenCL/inc_simd.h b/OpenCL/inc_simd.h index 4d0d359cf..89ba41dab 100644 --- a/OpenCL/inc_simd.h +++ b/OpenCL/inc_simd.h @@ -1133,8 +1133,8 @@ #define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 16) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 16) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 16) + 3].var[(idx)] ^= val.s3; (arr)[((gid) * 16) + 4].var[(idx)] ^= val.s4; (arr)[((gid) * 16) + 5].var[(idx)] ^= val.s5; (arr)[((gid) * 16) + 6].var[(idx)] ^= val.s6; (arr)[((gid) * 16) + 7].var[(idx)] ^= val.s7; (arr)[((gid) * 16) + 8].var[(idx)] ^= val.s8; (arr)[((gid) * 16) + 9].var[(idx)] ^= val.s9; (arr)[((gid) * 16) + 10].var[(idx)] ^= val.sa; (arr)[((gid) * 16) + 11].var[(idx)] ^= val.sb; (arr)[((gid) * 16) + 12].var[(idx)] ^= val.sc; (arr)[((gid) * 16) + 13].var[(idx)] ^= val.sd; (arr)[((gid) * 16) + 14].var[(idx)] ^= val.se; (arr)[((gid) * 16) + 15].var[(idx)] ^= val.sf; #endif -DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos); -DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos); -DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx); +DECLSPEC u32x ix_create_bft (CONSTANT_AS const bf_t *arr, const u32 il_pos); +DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos); +DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos, const int idx); #endif