mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-22 05:31:11 +00:00
Store precomputed KE for -m 22100 in shared memory and lock the loops per kernel invocation to a fixed value
This commit is contained in:
parent
db5decb750
commit
311d363054
@ -25,7 +25,7 @@ typedef struct bitlocker
|
||||
u32 type;
|
||||
u32 iv[4];
|
||||
u32 data[15];
|
||||
u32 wb_ke_pc[ITERATION_BITLOCKER][64]; // only 48 needed
|
||||
u32 wb_ke_pc[ITERATION_BITLOCKER][48];
|
||||
|
||||
} bitlocker_t;
|
||||
|
||||
@ -36,7 +36,13 @@ typedef struct bitlocker_tmp
|
||||
|
||||
} bitlocker_tmp_t;
|
||||
|
||||
DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, const GLOBAL_AS u32 wb_ke_pc[64])
|
||||
#ifdef REAL_SHM
|
||||
#define SHM_TYPE2 LOCAL_AS
|
||||
#else
|
||||
#define SHM_TYPE2 GLOBAL_AS
|
||||
#endif
|
||||
|
||||
DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE2 u32 s_wb_ke_pc[48])
|
||||
{
|
||||
u32x a = digest[0];
|
||||
u32x b = digest[1];
|
||||
@ -64,24 +70,24 @@ DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const
|
||||
u32x we_t = w3[2];
|
||||
u32x wf_t = w3[3];
|
||||
|
||||
#define ROUND_EXPAND_PC(i) \
|
||||
{ \
|
||||
w0_t = wb_ke_pc[i + 0]; \
|
||||
w1_t = wb_ke_pc[i + 1]; \
|
||||
w2_t = wb_ke_pc[i + 2]; \
|
||||
w3_t = wb_ke_pc[i + 3]; \
|
||||
w4_t = wb_ke_pc[i + 4]; \
|
||||
w5_t = wb_ke_pc[i + 5]; \
|
||||
w6_t = wb_ke_pc[i + 6]; \
|
||||
w7_t = wb_ke_pc[i + 7]; \
|
||||
w8_t = wb_ke_pc[i + 8]; \
|
||||
w9_t = wb_ke_pc[i + 9]; \
|
||||
wa_t = wb_ke_pc[i + 10]; \
|
||||
wb_t = wb_ke_pc[i + 11]; \
|
||||
wc_t = wb_ke_pc[i + 12]; \
|
||||
wd_t = wb_ke_pc[i + 13]; \
|
||||
we_t = wb_ke_pc[i + 14]; \
|
||||
wf_t = wb_ke_pc[i + 15]; \
|
||||
#define ROUND_EXPAND_PC(i) \
|
||||
{ \
|
||||
w0_t = s_wb_ke_pc[i + 0]; \
|
||||
w1_t = s_wb_ke_pc[i + 1]; \
|
||||
w2_t = s_wb_ke_pc[i + 2]; \
|
||||
w3_t = s_wb_ke_pc[i + 3]; \
|
||||
w4_t = s_wb_ke_pc[i + 4]; \
|
||||
w5_t = s_wb_ke_pc[i + 5]; \
|
||||
w6_t = s_wb_ke_pc[i + 6]; \
|
||||
w7_t = s_wb_ke_pc[i + 7]; \
|
||||
w8_t = s_wb_ke_pc[i + 8]; \
|
||||
w9_t = s_wb_ke_pc[i + 9]; \
|
||||
wa_t = s_wb_ke_pc[i + 10]; \
|
||||
wb_t = s_wb_ke_pc[i + 11]; \
|
||||
wc_t = s_wb_ke_pc[i + 12]; \
|
||||
wd_t = s_wb_ke_pc[i + 13]; \
|
||||
we_t = s_wb_ke_pc[i + 14]; \
|
||||
wf_t = s_wb_ke_pc[i + 15]; \
|
||||
}
|
||||
|
||||
#define ROUND_STEP(i) \
|
||||
@ -104,12 +110,14 @@ DECLSPEC void sha256_transform_vector_pc (const u32x *w0, const u32x *w1, const
|
||||
SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \
|
||||
}
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < 64; i += 16)
|
||||
for (int i = 16; i < 64; i += 16)
|
||||
{
|
||||
ROUND_EXPAND_PC (i); ROUND_STEP (i);
|
||||
ROUND_EXPAND_PC (i - 16); ROUND_STEP (i);
|
||||
}
|
||||
|
||||
#undef ROUND_EXPAND_PC
|
||||
@ -188,9 +196,60 @@ KERNEL_FQ void m22100_init (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
||||
KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
const u64 lsz = get_local_size (0);
|
||||
|
||||
/**
|
||||
* load 256 full w[] precomputed KE buffers into shared memory since its all static data
|
||||
* in order for this to work we need to set a fixed loop count to 256
|
||||
*/
|
||||
|
||||
#ifdef REAL_SHM
|
||||
|
||||
LOCAL_VK u32 s_wb_ke_pc[256][48];
|
||||
|
||||
for (u32 i = lid; i < 256; i += lsz)
|
||||
{
|
||||
for (u32 j = 0; j < 48; j++) // first 16 set to register
|
||||
{
|
||||
s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + i][j];
|
||||
}
|
||||
}
|
||||
|
||||
SYNC_THREADS ();
|
||||
|
||||
#else
|
||||
|
||||
GLOBAL_AS u32 (*s_wb_ke_pc)[48] = &esalt_bufs[digests_offset].wb_ke_pc[loop_pos];
|
||||
|
||||
#endif
|
||||
|
||||
if ((gid * VECT_SIZE) >= gid_max) return;
|
||||
|
||||
// salt to register
|
||||
|
||||
u32x t0[4];
|
||||
u32x t1[4];
|
||||
u32x t2[4];
|
||||
u32x t3[4];
|
||||
|
||||
t0[0] = salt_bufs[salt_pos].salt_buf[0];
|
||||
t0[1] = salt_bufs[salt_pos].salt_buf[1];
|
||||
t0[2] = salt_bufs[salt_pos].salt_buf[2];
|
||||
t0[3] = salt_bufs[salt_pos].salt_buf[3];
|
||||
t1[0] = 0;
|
||||
t1[1] = 0;
|
||||
t1[2] = 0x80000000;
|
||||
t1[3] = 0;
|
||||
t2[0] = 0;
|
||||
t2[1] = 0;
|
||||
t2[2] = 0;
|
||||
t2[3] = 0;
|
||||
t3[0] = 0;
|
||||
t3[1] = 0;
|
||||
t3[2] = 0;
|
||||
t3[3] = 88 * 8;
|
||||
|
||||
// init
|
||||
|
||||
u32x w0[4];
|
||||
@ -230,8 +289,11 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t))
|
||||
digest[6] = SHA256M_G;
|
||||
digest[7] = SHA256M_H;
|
||||
|
||||
sha256_transform_vector (w0, w1, w2, w3, digest);
|
||||
sha256_transform_vector_pc (w0, w1, w2, w3, digest, esalt_bufs[digests_offset].wb_ke_pc[j]);
|
||||
sha256_transform_vector (w0, w1, w2, w3, digest);
|
||||
|
||||
t1[0] = hc_swap32_S (j); // only moving part
|
||||
|
||||
sha256_transform_vector_pc (t0, t1, t2, t3, digest, s_wb_ke_pc[i]);
|
||||
|
||||
w0[0] = digest[0];
|
||||
w0[1] = digest[1];
|
||||
|
@ -51,7 +51,7 @@ typedef struct bitlocker
|
||||
u32 type;
|
||||
u32 iv[4];
|
||||
u32 data[15];
|
||||
u32 wb_ke_pc[ITERATION_BITLOCKER][64]; // only 48 needed
|
||||
u32 wb_ke_pc[ITERATION_BITLOCKER][48];
|
||||
|
||||
} bitlocker_t;
|
||||
|
||||
@ -78,11 +78,18 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : 256;
|
||||
const u32 kernel_loops_min = 256;
|
||||
|
||||
return kernel_threads_max;
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 256;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
|
||||
u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
@ -210,34 +217,35 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
|
||||
for (int i = 0; i < ITERATION_BITLOCKER; i++)
|
||||
{
|
||||
bitlocker->wb_ke_pc[i][ 0] = salt->salt_buf[0];
|
||||
bitlocker->wb_ke_pc[i][ 1] = salt->salt_buf[1];
|
||||
bitlocker->wb_ke_pc[i][ 2] = salt->salt_buf[2];
|
||||
bitlocker->wb_ke_pc[i][ 3] = salt->salt_buf[3];
|
||||
bitlocker->wb_ke_pc[i][ 4] = byte_swap_32 (i);
|
||||
bitlocker->wb_ke_pc[i][ 5] = 0;
|
||||
bitlocker->wb_ke_pc[i][ 6] = 0x80000000;
|
||||
bitlocker->wb_ke_pc[i][ 7] = 0;
|
||||
bitlocker->wb_ke_pc[i][ 8] = 0;
|
||||
bitlocker->wb_ke_pc[i][ 9] = 0;
|
||||
bitlocker->wb_ke_pc[i][10] = 0;
|
||||
bitlocker->wb_ke_pc[i][11] = 0;
|
||||
bitlocker->wb_ke_pc[i][12] = 0;
|
||||
bitlocker->wb_ke_pc[i][13] = 0;
|
||||
bitlocker->wb_ke_pc[i][14] = 0;
|
||||
bitlocker->wb_ke_pc[i][15] = 88 * 8;
|
||||
u32 tmp[64];
|
||||
|
||||
tmp[ 0] = salt->salt_buf[0];
|
||||
tmp[ 1] = salt->salt_buf[1];
|
||||
tmp[ 2] = salt->salt_buf[2];
|
||||
tmp[ 3] = salt->salt_buf[3];
|
||||
tmp[ 4] = byte_swap_32 (i);
|
||||
tmp[ 5] = 0;
|
||||
tmp[ 6] = 0x80000000;
|
||||
tmp[ 7] = 0;
|
||||
tmp[ 8] = 0;
|
||||
tmp[ 9] = 0;
|
||||
tmp[10] = 0;
|
||||
tmp[11] = 0;
|
||||
tmp[12] = 0;
|
||||
tmp[13] = 0;
|
||||
tmp[14] = 0;
|
||||
tmp[15] = 88 * 8;
|
||||
|
||||
#define hc_rotl32_S rotl32
|
||||
|
||||
for (int j = 16; j < 64; j++)
|
||||
{
|
||||
bitlocker->wb_ke_pc[i][j] = SHA256_EXPAND_S
|
||||
(
|
||||
bitlocker->wb_ke_pc[i][j - 2],
|
||||
bitlocker->wb_ke_pc[i][j - 7],
|
||||
bitlocker->wb_ke_pc[i][j - 15],
|
||||
bitlocker->wb_ke_pc[i][j - 16]
|
||||
);
|
||||
tmp[j] = SHA256_EXPAND_S (tmp[j - 2], tmp[j - 7], tmp[j - 15], tmp[j - 16]);
|
||||
}
|
||||
|
||||
for (int j = 0; j < 48; j++)
|
||||
{
|
||||
bitlocker->wb_ke_pc[i][j] = tmp[16 + j];
|
||||
}
|
||||
}
|
||||
|
||||
@ -423,9 +431,9 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
|
||||
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
|
||||
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
|
Loading…
Reference in New Issue
Block a user