Fix constant memory use of bfs_buf

pull/2022/head
Jens Steube 5 years ago
parent 2b0f657564
commit 7832c54452

@ -110,27 +110,27 @@
*/
#ifdef IS_CUDA
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *g_words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bs_word_t *g_words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e)
#define KERN_ATTR_RULES() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void)
#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e)
#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, void, void)
#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, void, e)
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const u32x *g_words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const u32x *g_words_buf_r, void, void, e)
#else
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, void)
#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, e)
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, e)
#else
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, void)
#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, e)
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, e)
#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, void, void)
#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, void, e)
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bf_t *bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, e)
#endif
// union based packing

@ -18,15 +18,18 @@
CONSTANT_VK u32 generic_constant[8192]; // 32k
#if ATTACK_KERN == 0
#define bfs_buf g_bfs_buf
#define rules_buf ((const kernel_rule_t *) generic_constant)
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 1
#define bfs_buf g_bfs_buf
#define rules_buf g_rules_buf
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 3
#define rules_buf g_rules_buf
#define bfs_buf ((const bf_t *) generic_constant)
#define words_buf_s ((const bs_word_t *) generic_constant)
#define words_buf_r ((const u32x *) generic_constant)
#endif

@ -11,18 +11,18 @@
// attack-mode 0
DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
DECLSPEC u32x ix_create_bft (CONSTANT_AS const bf_t *arr, const u32 il_pos)
{
#if VECT_SIZE == 1
const u32x ix = make_u32x (bfs_buf[il_pos + 0].i);
const u32x ix = make_u32x (arr[il_pos + 0].i);
#elif VECT_SIZE == 2
const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i);
const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i);
#elif VECT_SIZE == 4
const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i);
const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i);
#elif VECT_SIZE == 8
const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i);
const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i);
#elif VECT_SIZE == 16
const u32x ix = make_u32x (bfs_buf[il_pos + 0].i, bfs_buf[il_pos + 1].i, bfs_buf[il_pos + 2].i, bfs_buf[il_pos + 3].i, bfs_buf[il_pos + 4].i, bfs_buf[il_pos + 5].i, bfs_buf[il_pos + 6].i, bfs_buf[il_pos + 7].i, bfs_buf[il_pos + 8].i, bfs_buf[il_pos + 9].i, bfs_buf[il_pos + 10].i, bfs_buf[il_pos + 11].i, bfs_buf[il_pos + 12].i, bfs_buf[il_pos + 13].i, bfs_buf[il_pos + 14].i, bfs_buf[il_pos + 15].i);
const u32x ix = make_u32x (arr[il_pos + 0].i, arr[il_pos + 1].i, arr[il_pos + 2].i, arr[il_pos + 3].i, arr[il_pos + 4].i, arr[il_pos + 5].i, arr[il_pos + 6].i, arr[il_pos + 7].i, arr[il_pos + 8].i, arr[il_pos + 9].i, arr[il_pos + 10].i, arr[il_pos + 11].i, arr[il_pos + 12].i, arr[il_pos + 13].i, arr[il_pos + 14].i, arr[il_pos + 15].i);
#endif
return ix;
@ -30,35 +30,35 @@ DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos)
// attack-mode 1
DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos)
DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos)
{
#if VECT_SIZE == 1
const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len);
const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len);
#elif VECT_SIZE == 2
const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len);
const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len);
#elif VECT_SIZE == 4
const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len);
const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len);
#elif VECT_SIZE == 8
const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len);
const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len);
#elif VECT_SIZE == 16
const u32x pw_lenx = make_u32x (combs_buf[il_pos + 0].pw_len, combs_buf[il_pos + 1].pw_len, combs_buf[il_pos + 2].pw_len, combs_buf[il_pos + 3].pw_len, combs_buf[il_pos + 4].pw_len, combs_buf[il_pos + 5].pw_len, combs_buf[il_pos + 6].pw_len, combs_buf[il_pos + 7].pw_len, combs_buf[il_pos + 8].pw_len, combs_buf[il_pos + 9].pw_len, combs_buf[il_pos + 10].pw_len, combs_buf[il_pos + 11].pw_len, combs_buf[il_pos + 12].pw_len, combs_buf[il_pos + 13].pw_len, combs_buf[il_pos + 14].pw_len, combs_buf[il_pos + 15].pw_len);
const u32x pw_lenx = make_u32x (arr[il_pos + 0].pw_len, arr[il_pos + 1].pw_len, arr[il_pos + 2].pw_len, arr[il_pos + 3].pw_len, arr[il_pos + 4].pw_len, arr[il_pos + 5].pw_len, arr[il_pos + 6].pw_len, arr[il_pos + 7].pw_len, arr[il_pos + 8].pw_len, arr[il_pos + 9].pw_len, arr[il_pos + 10].pw_len, arr[il_pos + 11].pw_len, arr[il_pos + 12].pw_len, arr[il_pos + 13].pw_len, arr[il_pos + 14].pw_len, arr[il_pos + 15].pw_len);
#endif
return pw_lenx;
}
DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx)
DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos, const int idx)
{
#if VECT_SIZE == 1
const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx]);
const u32x ix = make_u32x (arr[il_pos + 0].i[idx]);
#elif VECT_SIZE == 2
const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx]);
const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx]);
#elif VECT_SIZE == 4
const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx]);
const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx]);
#elif VECT_SIZE == 8
const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx]);
const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx]);
#elif VECT_SIZE == 16
const u32x ix = make_u32x (combs_buf[il_pos + 0].i[idx], combs_buf[il_pos + 1].i[idx], combs_buf[il_pos + 2].i[idx], combs_buf[il_pos + 3].i[idx], combs_buf[il_pos + 4].i[idx], combs_buf[il_pos + 5].i[idx], combs_buf[il_pos + 6].i[idx], combs_buf[il_pos + 7].i[idx], combs_buf[il_pos + 8].i[idx], combs_buf[il_pos + 9].i[idx], combs_buf[il_pos + 10].i[idx], combs_buf[il_pos + 11].i[idx], combs_buf[il_pos + 12].i[idx], combs_buf[il_pos + 13].i[idx], combs_buf[il_pos + 14].i[idx], combs_buf[il_pos + 15].i[idx]);
const u32x ix = make_u32x (arr[il_pos + 0].i[idx], arr[il_pos + 1].i[idx], arr[il_pos + 2].i[idx], arr[il_pos + 3].i[idx], arr[il_pos + 4].i[idx], arr[il_pos + 5].i[idx], arr[il_pos + 6].i[idx], arr[il_pos + 7].i[idx], arr[il_pos + 8].i[idx], arr[il_pos + 9].i[idx], arr[il_pos + 10].i[idx], arr[il_pos + 11].i[idx], arr[il_pos + 12].i[idx], arr[il_pos + 13].i[idx], arr[il_pos + 14].i[idx], arr[il_pos + 15].i[idx]);
#endif
return ix;

@ -1133,8 +1133,8 @@
#define unpackv_xor(arr,var,gid,idx,val) (arr)[((gid) * 16) + 0].var[(idx)] ^= val.s0; (arr)[((gid) * 16) + 1].var[(idx)] ^= val.s1; (arr)[((gid) * 16) + 2].var[(idx)] ^= val.s2; (arr)[((gid) * 16) + 3].var[(idx)] ^= val.s3; (arr)[((gid) * 16) + 4].var[(idx)] ^= val.s4; (arr)[((gid) * 16) + 5].var[(idx)] ^= val.s5; (arr)[((gid) * 16) + 6].var[(idx)] ^= val.s6; (arr)[((gid) * 16) + 7].var[(idx)] ^= val.s7; (arr)[((gid) * 16) + 8].var[(idx)] ^= val.s8; (arr)[((gid) * 16) + 9].var[(idx)] ^= val.s9; (arr)[((gid) * 16) + 10].var[(idx)] ^= val.sa; (arr)[((gid) * 16) + 11].var[(idx)] ^= val.sb; (arr)[((gid) * 16) + 12].var[(idx)] ^= val.sc; (arr)[((gid) * 16) + 13].var[(idx)] ^= val.sd; (arr)[((gid) * 16) + 14].var[(idx)] ^= val.se; (arr)[((gid) * 16) + 15].var[(idx)] ^= val.sf;
#endif
DECLSPEC u32x ix_create_bft (GLOBAL_AS const bf_t *bfs_buf, const u32 il_pos);
DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos);
DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *combs_buf, const u32 il_pos, const int idx);
DECLSPEC u32x ix_create_bft (CONSTANT_AS const bf_t *arr, const u32 il_pos);
DECLSPEC u32x pwlenx_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos);
DECLSPEC u32x ix_create_combt (GLOBAL_AS const pw_t *arr, const u32 il_pos, const int idx);
#endif

Loading…
Cancel
Save