mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-26 01:50:10 +00:00
PDF 1.7 Kernel: Improved performance on AMD GPU by using shared memory for the scratch buffer
Inspired by https://github.com/reger-men/hashcat/blob/6.2.4/OpenCL/m10700-optimized.cl
This commit is contained in:
parent
d85ca3a752
commit
1d33b57144
@ -211,7 +211,7 @@ DECLSPEC void orig_sha512_transform (const u64 *w0, const u64 *w1, const u64 *w2
|
|||||||
#define WORDMAXSZ4 (WORDMAXSZ / 4)
|
#define WORDMAXSZ4 (WORDMAXSZ / 4)
|
||||||
#define AESSZ4 (AESSZ / 4)
|
#define AESSZ4 (AESSZ / 4)
|
||||||
|
|
||||||
DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
|
DECLSPEC void make_sc (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len)
|
||||||
{
|
{
|
||||||
const u32 bd = bl_len / 4;
|
const u32 bd = bl_len / 4;
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len)
|
DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len)
|
||||||
{
|
{
|
||||||
const u32 m = offset % pwbl_len;
|
const u32 m = offset % pwbl_len;
|
||||||
|
|
||||||
@ -293,7 +293,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||||
{
|
{
|
||||||
for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
|
for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4)
|
||||||
{
|
{
|
||||||
@ -315,12 +315,10 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||||
{
|
{
|
||||||
// make scratch buffer
|
// make scratch buffer
|
||||||
|
|
||||||
u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
|
|
||||||
|
|
||||||
make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
|
make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len);
|
||||||
|
|
||||||
// make sure pwbl_len is calculcated before it gets changed
|
// make sure pwbl_len is calculcated before it gets changed
|
||||||
@ -668,18 +666,20 @@ KERNEL_FQ void m10700_loop (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t))
|
|||||||
ctx.dgst_len = tmps[gid].dgst_len;
|
ctx.dgst_len = tmps[gid].dgst_len;
|
||||||
ctx.W_len = tmps[gid].W_len;
|
ctx.W_len = tmps[gid].W_len;
|
||||||
|
|
||||||
|
LOCAL_VK u32 s_sc[256][PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
|
||||||
|
|
||||||
u32 ex = 0;
|
u32 ex = 0;
|
||||||
|
|
||||||
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
|
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
|
||||||
{
|
{
|
||||||
ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
|
ex = do_round (s_sc[lid], w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((loop_pos + loop_cnt) == 64)
|
if ((loop_pos + loop_cnt) == 64)
|
||||||
{
|
{
|
||||||
for (u32 i = 64; i < (ex & 0xff) + 32; i++)
|
for (u32 i = 64; i < (ex & 0xff) + 32; i++)
|
||||||
{
|
{
|
||||||
ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
|
ex = do_round (s_sc[lid], w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,12 @@
|
|||||||
- Added hash-mode: MultiBit Classic .wallet (scrypt)
|
- Added hash-mode: MultiBit Classic .wallet (scrypt)
|
||||||
- Added hash-mode: MurmurHash3
|
- Added hash-mode: MurmurHash3
|
||||||
|
|
||||||
|
##
|
||||||
|
## Performance
|
||||||
|
##
|
||||||
|
|
||||||
|
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer
|
||||||
|
|
||||||
##
|
##
|
||||||
## Bugs
|
## Bugs
|
||||||
##
|
##
|
||||||
|
@ -108,6 +108,13 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
|
|||||||
return pw_max;
|
return pw_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||||
|
{
|
||||||
|
const u32 kernel_threads_max = 256;
|
||||||
|
|
||||||
|
return kernel_threads_max;
|
||||||
|
}
|
||||||
|
|
||||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||||
{
|
{
|
||||||
char *jit_build_options = NULL;
|
char *jit_build_options = NULL;
|
||||||
@ -360,7 +367,7 @@ void module_init (module_ctx_t *module_ctx)
|
|||||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||||
module_ctx->module_kern_type = module_kern_type;
|
module_ctx->module_kern_type = module_kern_type;
|
||||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||||
|
Loading…
Reference in New Issue
Block a user