mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-26 01:50:10 +00:00
Cracking bcrypt: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
This commit is contained in:
parent
42e440611a
commit
5ecbcde945
@ -299,16 +299,21 @@ __constant u32a c_sbox3[256] =
|
||||
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
|
||||
};
|
||||
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
|
||||
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
|
||||
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
|
||||
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
#define BF_ROUND(L,R,N) \
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
const u32 r0 = hc_bfe_S ((L), 24, 8); \
|
||||
const u32 r1 = hc_bfe_S ((L), 16, 8); \
|
||||
const u32 r2 = hc_bfe_S ((L), 8, 8); \
|
||||
const u32 r3 = hc_bfe_S ((L), 0, 8); \
|
||||
\
|
||||
tmp = S0[r0]; \
|
||||
tmp += S1[r1]; \
|
||||
tmp ^= S2[r2]; \
|
||||
tmp += S3[r3]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
|
||||
#define BF_ENCRYPT(L,R) \
|
||||
@ -361,7 +366,7 @@ DECLSPEC void expand_key (u32 *E, u32 *W, const int len)
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -438,10 +443,10 @@ __kernel void m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
* do the key setup
|
||||
*/
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
@ -580,7 +585,7 @@ __kernel void m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -607,10 +612,10 @@ __kernel void m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
P[i] = tmps[gid].P[i];
|
||||
}
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
@ -778,7 +783,7 @@ __kernel void m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void m03200_comp (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_comp (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -798,10 +803,10 @@ __kernel void m03200_comp (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
P[i] = tmps[gid].P[i];
|
||||
}
|
||||
|
||||
__local u32 S0_all[8][256];
|
||||
__local u32 S1_all[8][256];
|
||||
__local u32 S2_all[8][256];
|
||||
__local u32 S3_all[8][256];
|
||||
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
||||
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
||||
|
||||
__local u32 *S0 = S0_all[lid];
|
||||
__local u32 *S1 = S1_all[lid];
|
||||
|
@ -43,6 +43,7 @@
|
||||
## Improvements
|
||||
##
|
||||
|
||||
- Cracking bcrypt: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
|
||||
- Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key
|
||||
- Building: Updated BUILD.md
|
||||
- My Wallet: Added additional plaintext pattern used in newer versions
|
||||
|
@ -72,18 +72,34 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
const u32 kernel_threads_min = 8;
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
return kernel_threads_min;
|
||||
}
|
||||
// this uses some nice feedback effect.
|
||||
// based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
|
||||
// which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
|
||||
// therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
|
||||
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
|
||||
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
|
||||
// on my development system. no clue where the 4 bytes are spent.
|
||||
// I did some research on this and it seems to be related with the datatype.
|
||||
// For example, if i used u8 instead, there's only 1 byte wasted.
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_threads_max = 8;
|
||||
u32 fixed_local_size = 0;
|
||||
|
||||
return kernel_threads_max;
|
||||
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
fixed_local_size = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_local_size = (device_param->device_local_mem_size - 4) / 4096;
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
|
||||
@ -236,14 +252,14 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
module_ctx->module_opti_type = module_opti_type;
|
||||
|
Loading…
Reference in New Issue
Block a user