mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-29 19:38:18 +00:00
Store precomputed KE for -m 22100 in shared memory and lock the loops per kernel invocation to a fixed value
This commit is contained in:
parent
db5decb750
commit
311d363054
@ -25,7 +25,7 @@ typedef struct bitlocker
|
|||||||
u32 type;
|
u32 type;
|
||||||
u32 iv[4];
|
u32 iv[4];
|
||||||
u32 data[15];
|
u32 data[15];
|
||||||
u32 wb_ke_pc[ITERATION_BITLOCKER][64]; // only 48 needed
|
u32 wb_ke_pc[ITERATION_BITLOCKER][48];
|
||||||
|
|
||||||
} bitlocker_t;
|
} bitlocker_t;
|
||||||
|
|
||||||
@ -36,7 +36,13 @@ typedef struct bitlocker_tmp
|
|||||||
|
|
||||||
} bitlocker_tmp_t;
|
} bitlocker_tmp_t;
|
||||||
|
|
||||||
DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, const GLOBAL_AS u32 wb_ke_pc[64])
|
#ifdef REAL_SHM
|
||||||
|
#define SHM_TYPE2 LOCAL_AS
|
||||||
|
#else
|
||||||
|
#define SHM_TYPE2 GLOBAL_AS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE2 u32 s_wb_ke_pc[48])
|
||||||
{
|
{
|
||||||
u32x a = digest[0];
|
u32x a = digest[0];
|
||||||
u32x b = digest[1];
|
u32x b = digest[1];
|
||||||
@ -64,24 +70,24 @@ DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const
|
|||||||
u32x we_t = w3[2];
|
u32x we_t = w3[2];
|
||||||
u32x wf_t = w3[3];
|
u32x wf_t = w3[3];
|
||||||
|
|
||||||
#define ROUND_EXPAND_PC(i) \
|
#define ROUND_EXPAND_PC(i) \
|
||||||
{ \
|
{ \
|
||||||
w0_t = wb_ke_pc[i + 0]; \
|
w0_t = s_wb_ke_pc[i + 0]; \
|
||||||
w1_t = wb_ke_pc[i + 1]; \
|
w1_t = s_wb_ke_pc[i + 1]; \
|
||||||
w2_t = wb_ke_pc[i + 2]; \
|
w2_t = s_wb_ke_pc[i + 2]; \
|
||||||
w3_t = wb_ke_pc[i + 3]; \
|
w3_t = s_wb_ke_pc[i + 3]; \
|
||||||
w4_t = wb_ke_pc[i + 4]; \
|
w4_t = s_wb_ke_pc[i + 4]; \
|
||||||
w5_t = wb_ke_pc[i + 5]; \
|
w5_t = s_wb_ke_pc[i + 5]; \
|
||||||
w6_t = wb_ke_pc[i + 6]; \
|
w6_t = s_wb_ke_pc[i + 6]; \
|
||||||
w7_t = wb_ke_pc[i + 7]; \
|
w7_t = s_wb_ke_pc[i + 7]; \
|
||||||
w8_t = wb_ke_pc[i + 8]; \
|
w8_t = s_wb_ke_pc[i + 8]; \
|
||||||
w9_t = wb_ke_pc[i + 9]; \
|
w9_t = s_wb_ke_pc[i + 9]; \
|
||||||
wa_t = wb_ke_pc[i + 10]; \
|
wa_t = s_wb_ke_pc[i + 10]; \
|
||||||
wb_t = wb_ke_pc[i + 11]; \
|
wb_t = s_wb_ke_pc[i + 11]; \
|
||||||
wc_t = wb_ke_pc[i + 12]; \
|
wc_t = s_wb_ke_pc[i + 12]; \
|
||||||
wd_t = wb_ke_pc[i + 13]; \
|
wd_t = s_wb_ke_pc[i + 13]; \
|
||||||
we_t = wb_ke_pc[i + 14]; \
|
we_t = s_wb_ke_pc[i + 14]; \
|
||||||
wf_t = wb_ke_pc[i + 15]; \
|
wf_t = s_wb_ke_pc[i + 15]; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ROUND_STEP(i) \
|
#define ROUND_STEP(i) \
|
||||||
@ -104,12 +110,14 @@ DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const
|
|||||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
|
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ROUND_STEP (0);
|
||||||
|
|
||||||
#ifdef _unroll
|
#ifdef _unroll
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < 64; i += 16)
|
for (int i = 16; i < 64; i += 16)
|
||||||
{
|
{
|
||||||
ROUND_EXPAND_PC (i); ROUND_STEP (i);
|
ROUND_EXPAND_PC (i - 16); ROUND_STEP (i);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef ROUND_EXPAND_PC
|
#undef ROUND_EXPAND_PC
|
||||||
@ -188,9 +196,60 @@ KERNEL_FQ void m22100_init (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
|||||||
KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
||||||
{
|
{
|
||||||
const u64 gid = get_global_id (0);
|
const u64 gid = get_global_id (0);
|
||||||
|
const u64 lid = get_local_id (0);
|
||||||
|
const u64 lsz = get_local_size (0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* load 256 full w[] precomputed KE buffers into shared memory since its all static data
|
||||||
|
* in order for this to work we need to set a fixed loop count to 256
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef REAL_SHM
|
||||||
|
|
||||||
|
LOCAL_VK u32 s_wb_ke_pc[256][48];
|
||||||
|
|
||||||
|
for (u32 i = lid; i < 256; i += lsz)
|
||||||
|
{
|
||||||
|
for (u32 j = 0; j < 48; j++) // first 16 set to register
|
||||||
|
{
|
||||||
|
s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SYNC_THREADS ();
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
GLOBAL_AS u32 (*s_wb_ke_pc)[48] = &esalt_bufs[digests_offset].wb_ke_pc[loop_pos];
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
if ((gid * VECT_SIZE) >= gid_max) return;
|
if ((gid * VECT_SIZE) >= gid_max) return;
|
||||||
|
|
||||||
|
// salt to register
|
||||||
|
|
||||||
|
u32x t0[4];
|
||||||
|
u32x t1[4];
|
||||||
|
u32x t2[4];
|
||||||
|
u32x t3[4];
|
||||||
|
|
||||||
|
t0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||||
|
t0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||||
|
t0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||||
|
t0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||||
|
t1[0] = 0;
|
||||||
|
t1[1] = 0;
|
||||||
|
t1[2] = 0x80000000;
|
||||||
|
t1[3] = 0;
|
||||||
|
t2[0] = 0;
|
||||||
|
t2[1] = 0;
|
||||||
|
t2[2] = 0;
|
||||||
|
t2[3] = 0;
|
||||||
|
t3[0] = 0;
|
||||||
|
t3[1] = 0;
|
||||||
|
t3[2] = 0;
|
||||||
|
t3[3] = 88 * 8;
|
||||||
|
|
||||||
// init
|
// init
|
||||||
|
|
||||||
u32x w0[4];
|
u32x w0[4];
|
||||||
@ -230,8 +289,11 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
|||||||
digest[6] = SHA256M_G;
|
digest[6] = SHA256M_G;
|
||||||
digest[7] = SHA256M_H;
|
digest[7] = SHA256M_H;
|
||||||
|
|
||||||
sha256_transform_vector (w0, w1, w2, w3, digest);
|
sha256_transform_vector (w0, w1, w2, w3, digest);
|
||||||
sha256_transform_vector_pc (w0, w1, w2, w3, digest, esalt_bufs[digests_offset].wb_ke_pc[j]);
|
|
||||||
|
t1[0] = hc_swap32_S (j); // only moving part
|
||||||
|
|
||||||
|
sha256_transform_vector_pc (t0, t1, t2, t3, digest, s_wb_ke_pc[i]);
|
||||||
|
|
||||||
w0[0] = digest[0];
|
w0[0] = digest[0];
|
||||||
w0[1] = digest[1];
|
w0[1] = digest[1];
|
||||||
|
@ -51,7 +51,7 @@ typedef struct bitlocker
|
|||||||
u32 type;
|
u32 type;
|
||||||
u32 iv[4];
|
u32 iv[4];
|
||||||
u32 data[15];
|
u32 data[15];
|
||||||
u32 wb_ke_pc[ITERATION_BITLOCKER][64]; // only 48 needed
|
u32 wb_ke_pc[ITERATION_BITLOCKER][48];
|
||||||
|
|
||||||
} bitlocker_t;
|
} bitlocker_t;
|
||||||
|
|
||||||
@ -78,11 +78,18 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
|||||||
return tmp_size;
|
return tmp_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||||
{
|
{
|
||||||
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : 256;
|
const u32 kernel_loops_min = 256;
|
||||||
|
|
||||||
return kernel_threads_max;
|
return kernel_loops_min;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||||
|
{
|
||||||
|
const u32 kernel_loops_max = 256;
|
||||||
|
|
||||||
|
return kernel_loops_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||||
@ -210,34 +217,35 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
|||||||
|
|
||||||
for (int i = 0; i < ITERATION_BITLOCKER; i++)
|
for (int i = 0; i < ITERATION_BITLOCKER; i++)
|
||||||
{
|
{
|
||||||
bitlocker->wb_ke_pc[i][ 0] = salt->salt_buf[0];
|
u32 tmp[64];
|
||||||
bitlocker->wb_ke_pc[i][ 1] = salt->salt_buf[1];
|
|
||||||
bitlocker->wb_ke_pc[i][ 2] = salt->salt_buf[2];
|
tmp[ 0] = salt->salt_buf[0];
|
||||||
bitlocker->wb_ke_pc[i][ 3] = salt->salt_buf[3];
|
tmp[ 1] = salt->salt_buf[1];
|
||||||
bitlocker->wb_ke_pc[i][ 4] = byte_swap_32 (i);
|
tmp[ 2] = salt->salt_buf[2];
|
||||||
bitlocker->wb_ke_pc[i][ 5] = 0;
|
tmp[ 3] = salt->salt_buf[3];
|
||||||
bitlocker->wb_ke_pc[i][ 6] = 0x80000000;
|
tmp[ 4] = byte_swap_32 (i);
|
||||||
bitlocker->wb_ke_pc[i][ 7] = 0;
|
tmp[ 5] = 0;
|
||||||
bitlocker->wb_ke_pc[i][ 8] = 0;
|
tmp[ 6] = 0x80000000;
|
||||||
bitlocker->wb_ke_pc[i][ 9] = 0;
|
tmp[ 7] = 0;
|
||||||
bitlocker->wb_ke_pc[i][10] = 0;
|
tmp[ 8] = 0;
|
||||||
bitlocker->wb_ke_pc[i][11] = 0;
|
tmp[ 9] = 0;
|
||||||
bitlocker->wb_ke_pc[i][12] = 0;
|
tmp[10] = 0;
|
||||||
bitlocker->wb_ke_pc[i][13] = 0;
|
tmp[11] = 0;
|
||||||
bitlocker->wb_ke_pc[i][14] = 0;
|
tmp[12] = 0;
|
||||||
bitlocker->wb_ke_pc[i][15] = 88 * 8;
|
tmp[13] = 0;
|
||||||
|
tmp[14] = 0;
|
||||||
|
tmp[15] = 88 * 8;
|
||||||
|
|
||||||
#define hc_rotl32_S rotl32
|
#define hc_rotl32_S rotl32
|
||||||
|
|
||||||
for (int j = 16; j < 64; j++)
|
for (int j = 16; j < 64; j++)
|
||||||
{
|
{
|
||||||
bitlocker->wb_ke_pc[i][j] = SHA256_EXPAND_S
|
tmp[j] = SHA256_EXPAND_S (tmp[j - 2], tmp[j - 7], tmp[j - 15], tmp[j - 16]);
|
||||||
(
|
}
|
||||||
bitlocker->wb_ke_pc[i][j - 2],
|
|
||||||
bitlocker->wb_ke_pc[i][j - 7],
|
for (int j = 0; j < 48; j++)
|
||||||
bitlocker->wb_ke_pc[i][j - 15],
|
{
|
||||||
bitlocker->wb_ke_pc[i][j - 16]
|
bitlocker->wb_ke_pc[i][j] = tmp[16 + j];
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,9 +431,9 @@ void module_init (module_ctx_t *module_ctx)
|
|||||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||||
module_ctx->module_kern_type = module_kern_type;
|
module_ctx->module_kern_type = module_kern_type;
|
||||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||||
|
Loading…
Reference in New Issue
Block a user