Use real constant memory on CUDA

pull/2022/head
Jens Steube 5 years ago
parent 5d14a59304
commit 46f737c5af

@ -26,6 +26,44 @@
* - P19: Type of the esalt_bufs structure with additional data, or void.
*/
#ifdef IS_CUDA
#define KERN_ATTR(p2,p4,p5,p6,p19) \
MAYBE_UNUSED GLOBAL_AS pw_t *pws, \
MAYBE_UNUSED p2 const kernel_rule_t *g_rules_buf, \
MAYBE_UNUSED GLOBAL_AS const pw_t *combs_buf, \
MAYBE_UNUSED p4, \
MAYBE_UNUSED GLOBAL_AS p5 *tmps, \
MAYBE_UNUSED GLOBAL_AS p6 *hooks, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_a, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_b, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_c, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_d, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_a, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_b, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_c, \
MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_d, \
MAYBE_UNUSED GLOBAL_AS plain_t *plains_buf, \
MAYBE_UNUSED GLOBAL_AS const digest_t *digests_buf, \
MAYBE_UNUSED GLOBAL_AS u32 *hashes_shown, \
MAYBE_UNUSED GLOBAL_AS const salt_t *salt_bufs, \
MAYBE_UNUSED GLOBAL_AS const p19 *esalt_bufs, \
MAYBE_UNUSED GLOBAL_AS u32 *d_return_buf, \
MAYBE_UNUSED GLOBAL_AS void *d_extra0_buf, \
MAYBE_UNUSED GLOBAL_AS void *d_extra1_buf, \
MAYBE_UNUSED GLOBAL_AS void *d_extra2_buf, \
MAYBE_UNUSED GLOBAL_AS void *d_extra3_buf, \
MAYBE_UNUSED const u32 bitmap_mask, \
MAYBE_UNUSED const u32 bitmap_shift1, \
MAYBE_UNUSED const u32 bitmap_shift2, \
MAYBE_UNUSED const u32 salt_pos, \
MAYBE_UNUSED const u32 loop_pos, \
MAYBE_UNUSED const u32 loop_cnt, \
MAYBE_UNUSED const u32 il_cnt, \
MAYBE_UNUSED const u32 digests_cnt, \
MAYBE_UNUSED const u32 digests_offset, \
MAYBE_UNUSED const u32 combs_mode, \
MAYBE_UNUSED const u64 gid_max
#else
#define KERN_ATTR(p2,p4,p5,p6,p19) \
MAYBE_UNUSED GLOBAL_AS pw_t *pws, \
MAYBE_UNUSED p2 const kernel_rule_t *rules_buf, \
@ -62,7 +100,7 @@
MAYBE_UNUSED const u32 digests_offset, \
MAYBE_UNUSED const u32 combs_mode, \
MAYBE_UNUSED const u64 gid_max
#endif
/*
* Shortcut macros for usage in the actual kernels
*
@ -71,8 +109,20 @@
* do not use rules or tmps, etc.
*/
#ifdef IS_CUDA
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *g_words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_TMPS(t) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, void)
#define KERN_ATTR_TMPS_ESALT(t,e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, void, e)
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *g_words_buf_r, void, void, e)
#else
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_r, void, void, void)
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const bs_word_t *words_buf_s, void, void, void)
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
#define KERN_ATTR_RULES() KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, void)
#define KERN_ATTR_RULES_ESALT(e) KERN_ATTR (CONSTANT_AS, GLOBAL_AS const bf_t *bfs_buf, void, void, e)
@ -81,6 +131,7 @@
#define KERN_ATTR_TMPS_HOOKS(t,h) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *bfs_buf, t, h, void)
#define KERN_ATTR_VECTOR() KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, void)
#define KERN_ATTR_VECTOR_ESALT(e) KERN_ATTR (GLOBAL_AS, CONSTANT_AS const u32x *words_buf_r, void, void, e)
#endif
// union based packing

@ -13,6 +13,26 @@
#ifdef IS_CUDA
#if ATTACK_EXEC == 11
CONSTANT_VK u32 generic_constant[8192]; // 32k
#if ATTACK_KERN == 0
#define rules_buf ((const kernel_rule_t *) generic_constant)
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 1
#define rules_buf g_rules_buf
#define words_buf_s g_words_buf_s
#define words_buf_r g_words_buf_r
#elif ATTACK_KERN == 3
#define rules_buf g_rules_buf
#define words_buf_s ((const bs_word_t *) generic_constant)
#define words_buf_r ((const u32x *) generic_constant)
#endif
#endif
DECLSPEC u32 atomic_dec (u32 *p)
{
return atomicSub (p, 1);

@ -1490,6 +1490,34 @@ int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmod
return 0;
}
int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
CUDA_PTR *cuda = backend_ctx->cuda;
const CUresult CU_err = cuda->cuModuleGetGlobal (dptr, bytes, hmod, name);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuModuleGetGlobal(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
@ -7346,9 +7374,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
// we don't have sm_* on vendors not NV but it doesn't matter
#if defined (DEBUG)
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern);
#else
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern);
#endif
build_options_buf[build_options_len] = 0;
@ -8276,7 +8304,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
{
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules, size_rules); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
size_t dummy;
CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_rules_c, &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
}
else
{
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c); if (CU_rc == -1) return -1;
}
CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules); if (CU_rc == -1) return -1;
}
@ -8290,10 +8328,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
{
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs, size_bfs); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c, size_bfs); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_root_css_buf, size_root_css); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_markov_css_buf, size_markov_css); if (CU_rc == -1) return -1;
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
size_t dummy;
CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_bfs_c, &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
CU_rc = hc_cuModuleGetGlobal (hashcat_ctx, &device_param->cuda_d_tm_c, &dummy, device_param->cuda_module, "generic_constant"); if (CU_rc == -1) return -1;
}
else
{
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_bfs_c, size_bfs); if (CU_rc == -1) return -1;
CU_rc = hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_tm_c, size_tm); if (CU_rc == -1) return -1;
}
}
}
@ -10665,11 +10714,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->cuda_d_pws_comp_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_comp_buf);
if (device_param->cuda_d_pws_idx) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_pws_idx);
if (device_param->cuda_d_rules) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules);
if (device_param->cuda_d_rules_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c);
//if (device_param->cuda_d_rules_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_rules_c);
if (device_param->cuda_d_combs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs);
if (device_param->cuda_d_combs_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_combs_c);
if (device_param->cuda_d_bfs) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs);
if (device_param->cuda_d_bfs_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c);
//if (device_param->cuda_d_bfs_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bfs_c);
if (device_param->cuda_d_bitmap_s1_a) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_a);
if (device_param->cuda_d_bitmap_s1_b) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_b);
if (device_param->cuda_d_bitmap_s1_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_bitmap_s1_c);
@ -10692,7 +10741,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->cuda_d_extra3_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_extra3_buf);
if (device_param->cuda_d_root_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_root_css_buf);
if (device_param->cuda_d_markov_css_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_markov_css_buf);
if (device_param->cuda_d_tm_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c);
//if (device_param->cuda_d_tm_c) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_tm_c);
if (device_param->cuda_d_st_digests_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_digests_buf);
if (device_param->cuda_d_st_salts_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_salts_buf);
if (device_param->cuda_d_st_esalts_buf) hc_cuMemFree (hashcat_ctx, device_param->cuda_d_st_esalts_buf);

Loading…
Cancel
Save