1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-22 08:08:10 +00:00

Scrypt Kernels: Reduced kernel wait times by making it a true split kernel where iteration count = N value

This commit is contained in:
Jens Steube 2021-04-21 15:59:14 +02:00
parent 56f47cabe2
commit 15f35fa68c
20 changed files with 724 additions and 313 deletions

View File

@ -62,6 +62,7 @@
MAYBE_UNUSED const u32 digests_cnt, \
MAYBE_UNUSED const u32 digests_offset_host, \
MAYBE_UNUSED const u32 combs_mode, \
MAYBE_UNUSED const u32 salt_repeat, \
MAYBE_UNUSED const u64 pws_pos, \
MAYBE_UNUSED const u64 gid_max
#else
@ -100,6 +101,7 @@
MAYBE_UNUSED const u32 digests_cnt, \
MAYBE_UNUSED const u32 digests_offset_host, \
MAYBE_UNUSED const u32 combs_mode, \
MAYBE_UNUSED const u32 salt_repeat, \
MAYBE_UNUSED const u64 pws_pos, \
MAYBE_UNUSED const u64 gid_max
#endif

View File

@ -1642,6 +1642,7 @@ typedef struct salt
u32 salt_iter;
u32 salt_iter2;
u32 salt_sign[2];
u32 salt_repeats;
u32 orig_pos;

View File

@ -170,14 +170,16 @@ DECLSPEC void salsa_r (uint4 *TI)
TO[idx_r2++] = R3;
}
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < STATE_CNT4; i++)
{
TI[i] = TO[i];
}
}
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
@ -200,9 +202,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
case 3: V = V3; break;
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -230,7 +229,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
}
for (u32 i = 0; i < SCRYPT_N; i++)
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
}
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4;
const u32 x = get_global_id (0);
const u32 xd4 = x / 4;
const u32 xm4 = x & 3;
GLOBAL_AS uint4 *V;
switch (xm4)
{
case 0: V = V0; break;
case 1: V = V1; break;
case 2: V = V2; break;
case 3: V = V3; break;
}
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
{
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
@ -247,9 +310,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
salsa_r (X);
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -341,6 +401,41 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
}
}
KERNEL_FQ void m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
// SCRYPT part, init V
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{
const u64 gid = get_global_id (0);
@ -355,28 +450,19 @@ KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
#if SCRYPT_P >= 1
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
{
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
}
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))

View File

@ -184,7 +184,7 @@ DECLSPEC void salsa_r (uint4 *TI)
}
}
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
@ -207,9 +207,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
case 3: V = V3; break;
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -237,7 +234,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
}
for (u32 i = 0; i < SCRYPT_N; i++)
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
}
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4;
const u32 x = get_global_id (0);
const u32 xd4 = x / 4;
const u32 xm4 = x & 3;
GLOBAL_AS uint4 *V;
switch (xm4)
{
case 0: V = V0; break;
case 1: V = V1; break;
case 2: V = V2; break;
case 3: V = V3; break;
}
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
{
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
@ -254,9 +315,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
salsa_r (X);
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -477,6 +535,41 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
}
}
KERNEL_FQ void m15700_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
// SCRYPT part, init V
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
{
const u64 gid = get_global_id (0);
@ -491,28 +584,19 @@ KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
#if SCRYPT_P >= 1
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
{
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
}
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))

View File

@ -225,7 +225,7 @@ DECLSPEC void salsa_r (uint4 *TI)
}
}
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
@ -248,9 +248,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
case 3: V = V3; break;
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -278,7 +275,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
}
for (u32 i = 0; i < SCRYPT_N; i++)
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
}
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
{
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
#define CO Coord(xd4,y,z)
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
const u32 zSIZE = STATE_CNT4;
const u32 x = get_global_id (0);
const u32 xd4 = x / 4;
const u32 xm4 = x & 3;
GLOBAL_AS uint4 *V;
switch (xm4)
{
case 0: V = V0; break;
case 1: V = V1; break;
case 2: V = V2; break;
case 3: V = V3; break;
}
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#else
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
#endif
X[i + 0] = T[0];
X[i + 1] = T[1];
X[i + 2] = T[2];
X[i + 3] = T[3];
}
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
{
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
@ -295,9 +356,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
salsa_r (X);
}
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < STATE_CNT4; i += 4)
{
#ifdef IS_CUDA
@ -429,6 +487,41 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
}
}
KERNEL_FQ void m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
// SCRYPT part, init V
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
{
const u64 gid = get_global_id (0);
@ -443,28 +536,19 @@ KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
uint4 X[STATE_CNT4];
uint4 T[STATE_CNT4];
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
const u32 P_offset = salt_repeat * STATE_CNT4;
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
#if SCRYPT_P >= 1
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
{
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
}
#ifdef _unroll
#pragma unroll
#endif
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
}
KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))

View File

@ -62,6 +62,7 @@
- OpenCL Runtime: Workaround JiT compiler deadlock on NVIDIA driver >= 465.89
- RAR3 Kernels: Improved loop code, improving performance by 23%
- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices
- Scrypt Kernels: Reduced kernel wait times by making it a true split kernel where iteration count = N value
##
## Technical

View File

@ -369,14 +369,14 @@ GeForce_GTX_TITAN 3 9900 2 A
## SCRYPT
##
DEVICE_TYPE_CPU * 8900 1 N 1
DEVICE_TYPE_GPU * 8900 1 N 1
DEVICE_TYPE_CPU * 9300 1 N 1
DEVICE_TYPE_GPU * 9300 1 N 1
DEVICE_TYPE_CPU * 15700 1 N 1
DEVICE_TYPE_GPU * 15700 1 1 1
DEVICE_TYPE_CPU * 22700 1 N 1
DEVICE_TYPE_GPU * 22700 1 N 1
DEVICE_TYPE_CPU * 8900 1 N A
DEVICE_TYPE_GPU * 8900 1 N A
DEVICE_TYPE_CPU * 9300 1 N A
DEVICE_TYPE_GPU * 9300 1 N A
DEVICE_TYPE_CPU * 15700 1 N A
DEVICE_TYPE_GPU * 15700 1 1 A
DEVICE_TYPE_CPU * 22700 1 N A
DEVICE_TYPE_GPU * 22700 1 N A
## Here's an example of how to manually tune SCRYPT algorithm kernels for your hardware.
## Manually tuning the GPU will yield increased performance. There is typically no noticeable change to CPU performance.
@ -466,12 +466,12 @@ DEVICE_TYPE_GPU * 22700 1 N
## Find the ideal -n value, then store it here along with the proper compute device name.
## Formatting guidelines are availabe at the top of this document.
GeForce_GTX_980 * 8900 1 28 1
GeForce_GTX_980 * 9300 1 128 1
GeForce_GTX_980 * 15700 1 1 1
GeForce_GTX_980 * 22700 1 28 1
GeForce_GTX_980 * 8900 1 28 A
GeForce_GTX_980 * 9300 1 128 A
GeForce_GTX_980 * 15700 1 1 A
GeForce_GTX_980 * 22700 1 28 A
GeForce_RTX_2080_Ti * 8900 1 N 1
GeForce_RTX_2080_Ti * 9300 1 544 1
GeForce_RTX_2080_Ti * 15700 1 4 1
GeForce_RTX_2080_Ti * 22700 1 N 1
GeForce_RTX_2080_Ti * 8900 1 N A
GeForce_RTX_2080_Ti * 9300 1 544 A
GeForce_RTX_2080_Ti * 15700 1 4 A
GeForce_RTX_2080_Ti * 22700 1 N A

View File

@ -257,12 +257,14 @@ typedef enum kern_run
{
KERN_RUN_1 = 1000,
KERN_RUN_12 = 1500,
KERN_RUN_2P = 1999,
KERN_RUN_2 = 2000,
KERN_RUN_2E = 2001,
KERN_RUN_23 = 2500,
KERN_RUN_3 = 3000,
KERN_RUN_4 = 4000,
KERN_RUN_INIT2 = 5000,
KERN_RUN_LOOP2P = 5999,
KERN_RUN_LOOP2 = 6000,
KERN_RUN_AUX1 = 7001,
KERN_RUN_AUX2 = 7002,
@ -412,30 +414,33 @@ typedef enum opts_type
OPTS_TYPE_ST_BASE64 = (1ULL << 26),
OPTS_TYPE_HASH_COPY = (1ULL << 28),
OPTS_TYPE_HASH_SPLIT = (1ULL << 29),
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 30), // a kernel which is called each time normal _loop kernel finished.
OPTS_TYPE_LOOP_PREPARE = (1ULL << 30), // a kernel which is called each time before _loop kernel started.
// like a hook12 kernel but without extra buffers.
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 31), // a kernel which is called each time normal _loop kernel finished.
// but unlike a hook kernel this kernel is called for every _loop iteration offset
OPTS_TYPE_HOOK12 = (1ULL << 31),
OPTS_TYPE_HOOK23 = (1ULL << 32),
OPTS_TYPE_INIT2 = (1ULL << 33),
OPTS_TYPE_LOOP2 = (1ULL << 34),
OPTS_TYPE_AUX1 = (1ULL << 35),
OPTS_TYPE_AUX2 = (1ULL << 36),
OPTS_TYPE_AUX3 = (1ULL << 37),
OPTS_TYPE_AUX4 = (1ULL << 38),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 39),
OPTS_TYPE_HOOK12 = (1ULL << 32),
OPTS_TYPE_HOOK23 = (1ULL << 33),
OPTS_TYPE_INIT2 = (1ULL << 34),
OPTS_TYPE_LOOP2_PREPARE = (1ULL << 35), // same as OPTS_TYPE_LOOP_PREPARE but for loop2 kernel
OPTS_TYPE_LOOP2 = (1ULL << 36),
OPTS_TYPE_AUX1 = (1ULL << 37),
OPTS_TYPE_AUX2 = (1ULL << 38),
OPTS_TYPE_AUX3 = (1ULL << 39),
OPTS_TYPE_AUX4 = (1ULL << 40),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 41),
OPTS_TYPE_BINARY_HASHFILE_OPTIONAL
= (1ULL << 40), // this allows us to not enforce the use of a binary file. requires OPTS_TYPE_BINARY_HASHFILE set to be effective.
OPTS_TYPE_PT_ADD06 = (1ULL << 41),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 42),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 43), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_TM_KERNEL = (1ULL << 44),
OPTS_TYPE_SUGGEST_KG = (1ULL << 45), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
OPTS_TYPE_COPY_TMPS = (1ULL << 46), // if we want to use data from tmps buffer (for example get the PMK in WPA)
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 47), // sometimes the password should not be printed to potfile
OPTS_TYPE_DYNAMIC_SHARED = (1ULL << 48), // use dynamic shared memory (note: needs special kernel changes)
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 49), // some algos use JiT in combinations with a salt or create too much startup time
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 50), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
OPTS_TYPE_NATIVE_THREADS = (1ULL << 51), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
= (1ULL << 42), // this allows us to not enforce the use of a binary file. requires OPTS_TYPE_BINARY_HASHFILE set to be effective.
OPTS_TYPE_PT_ADD06 = (1ULL << 43),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 44),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 45), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_TM_KERNEL = (1ULL << 46),
OPTS_TYPE_SUGGEST_KG = (1ULL << 47), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
OPTS_TYPE_COPY_TMPS = (1ULL << 48), // if we want to use data from tmps buffer (for example get the PMK in WPA)
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 49), // sometimes the password should not be printed to potfile
OPTS_TYPE_DYNAMIC_SHARED = (1ULL << 50), // use dynamic shared memory (note: needs special kernel changes)
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 51), // some algos use JiT in combinations with a salt or create too much startup time
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 52), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
OPTS_TYPE_NATIVE_THREADS = (1ULL << 53), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
} opts_type_t;
@ -1094,12 +1099,14 @@ typedef struct hc_device_param
u32 kernel_wgs1;
u32 kernel_wgs12;
u32 kernel_wgs2p;
u32 kernel_wgs2;
u32 kernel_wgs2e;
u32 kernel_wgs23;
u32 kernel_wgs3;
u32 kernel_wgs4;
u32 kernel_wgs_init2;
u32 kernel_wgs_loop2p;
u32 kernel_wgs_loop2;
u32 kernel_wgs_mp;
u32 kernel_wgs_mp_l;
@ -1116,12 +1123,14 @@ typedef struct hc_device_param
u32 kernel_preferred_wgs_multiple1;
u32 kernel_preferred_wgs_multiple12;
u32 kernel_preferred_wgs_multiple2p;
u32 kernel_preferred_wgs_multiple2;
u32 kernel_preferred_wgs_multiple2e;
u32 kernel_preferred_wgs_multiple23;
u32 kernel_preferred_wgs_multiple3;
u32 kernel_preferred_wgs_multiple4;
u32 kernel_preferred_wgs_multiple_init2;
u32 kernel_preferred_wgs_multiple_loop2p;
u32 kernel_preferred_wgs_multiple_loop2;
u32 kernel_preferred_wgs_multiple_mp;
u32 kernel_preferred_wgs_multiple_mp_l;
@ -1138,12 +1147,14 @@ typedef struct hc_device_param
u64 kernel_local_mem_size1;
u64 kernel_local_mem_size12;
u64 kernel_local_mem_size2p;
u64 kernel_local_mem_size2;
u64 kernel_local_mem_size2e;
u64 kernel_local_mem_size23;
u64 kernel_local_mem_size3;
u64 kernel_local_mem_size4;
u64 kernel_local_mem_size_init2;
u64 kernel_local_mem_size_loop2p;
u64 kernel_local_mem_size_loop2;
u64 kernel_local_mem_size_mp;
u64 kernel_local_mem_size_mp_l;
@ -1160,12 +1171,14 @@ typedef struct hc_device_param
u64 kernel_dynamic_local_mem_size1;
u64 kernel_dynamic_local_mem_size12;
u64 kernel_dynamic_local_mem_size2p;
u64 kernel_dynamic_local_mem_size2;
u64 kernel_dynamic_local_mem_size2e;
u64 kernel_dynamic_local_mem_size23;
u64 kernel_dynamic_local_mem_size3;
u64 kernel_dynamic_local_mem_size4;
u64 kernel_dynamic_local_mem_size_init2;
u64 kernel_dynamic_local_mem_size_loop2p;
u64 kernel_dynamic_local_mem_size_loop2;
u64 kernel_dynamic_local_mem_size_mp;
u64 kernel_dynamic_local_mem_size_mp_l;
@ -1273,11 +1286,13 @@ typedef struct hc_device_param
// workaround cpu spinning
double exec_us_prev1[EXPECTED_ITERATIONS];
double exec_us_prev2p[EXPECTED_ITERATIONS];
double exec_us_prev2[EXPECTED_ITERATIONS];
double exec_us_prev2e[EXPECTED_ITERATIONS];
double exec_us_prev3[EXPECTED_ITERATIONS];
double exec_us_prev4[EXPECTED_ITERATIONS];
double exec_us_prev_init2[EXPECTED_ITERATIONS];
double exec_us_prev_loop2p[EXPECTED_ITERATIONS];
double exec_us_prev_loop2[EXPECTED_ITERATIONS];
double exec_us_prev_aux1[EXPECTED_ITERATIONS];
double exec_us_prev_aux2[EXPECTED_ITERATIONS];
@ -1378,12 +1393,14 @@ typedef struct hc_device_param
CUfunction cuda_function1;
CUfunction cuda_function12;
CUfunction cuda_function2p;
CUfunction cuda_function2;
CUfunction cuda_function2e;
CUfunction cuda_function23;
CUfunction cuda_function3;
CUfunction cuda_function4;
CUfunction cuda_function_init2;
CUfunction cuda_function_loop2p;
CUfunction cuda_function_loop2;
CUfunction cuda_function_mp;
CUfunction cuda_function_mp_l;
@ -1462,12 +1479,14 @@ typedef struct hc_device_param
cl_kernel opencl_kernel1;
cl_kernel opencl_kernel12;
cl_kernel opencl_kernel2p;
cl_kernel opencl_kernel2;
cl_kernel opencl_kernel2e;
cl_kernel opencl_kernel23;
cl_kernel opencl_kernel3;
cl_kernel opencl_kernel4;
cl_kernel opencl_kernel_init2;
cl_kernel opencl_kernel_loop2p;
cl_kernel opencl_kernel_loop2;
cl_kernel opencl_kernel_mp;
cl_kernel opencl_kernel_mp_l;

View File

@ -2998,11 +2998,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
else
{
bool run_init = true;
bool run_loop = true;
bool run_comp = true;
if (run_init == true)
if (true)
{
if (device_param->is_cuda == true)
{
@ -3089,165 +3085,190 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
}
if (run_loop == true)
if (true)
{
u32 iter = hashes->salts_buf[salt_pos].salt_iter;
const u32 salt_repeats = hashes->salts_buf[salt_pos].salt_repeats;
u32 loop_step = device_param->kernel_loops;
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
for (u32 salt_repeat = 0; salt_repeat <= salt_repeats; salt_repeat++)
{
u32 loop_left = iter - loop_pos;
device_param->kernel_params_buf32[34] = salt_repeat;
loop_left = MIN (loop_left, loop_step);
device_param->kernel_params_buf32[28] = loop_pos;
device_param->kernel_params_buf32[29] = loop_left;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2E, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, pws_pos, pws_cnt, false, 0) == -1) return -1;
}
//bug?
//while (status_ctx->run_thread_level2 == false) break;
if (status_ctx->run_thread_level2 == false) break;
/**
* speed
*/
const float iter_part = (float) (loop_pos + loop_left) / iter;
const u64 perf_sum_all = (u64) (pws_cnt * iter_part);
double speed_msec = hc_timer_get (device_param->timer_speed);
const u32 speed_pos = device_param->speed_pos;
device_param->speed_cnt[speed_pos] = perf_sum_all;
device_param->speed_msec[speed_pos] = speed_msec;
if (user_options->speed_only == true)
if (true)
{
if (speed_msec > 4000)
const u32 iter = hashes->salts_buf[salt_pos].salt_iter;
const u32 loop_step = device_param->kernel_loops;
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
{
device_param->outerloop_multi *= (double) iter / (double) (loop_pos + loop_left);
u32 loop_left = iter - loop_pos;
device_param->speed_pos = 1;
loop_left = MIN (loop_left, loop_step);
device_param->speed_only_finish = true;
device_param->kernel_params_buf32[28] = loop_pos;
device_param->kernel_params_buf32[29] = loop_left;
return 0;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
{
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2E, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
}
//bug?
//while (status_ctx->run_thread_level2 == false) break;
if (status_ctx->run_thread_level2 == false) break;
/**
* speed
*/
const float iter_part = (float) (loop_pos + loop_left) / iter;
const u64 perf_sum_all = (u64) (pws_cnt * iter_part);
double speed_msec = hc_timer_get (device_param->timer_speed);
const u32 speed_pos = device_param->speed_pos;
device_param->speed_cnt[speed_pos] = perf_sum_all;
device_param->speed_msec[speed_pos] = speed_msec;
if (user_options->speed_only == true)
{
if (speed_msec > 4000)
{
device_param->outerloop_multi *= (double) iter / (double) (loop_pos + loop_left);
device_param->speed_pos = 1;
device_param->speed_only_finish = true;
return 0;
}
}
}
if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
{
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_pos, pws_cnt, false, 0) == -1) return -1;
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
}
const int hook_threads = (int) user_options->hook_threads;
hook_thread_param_t *hook_threads_param = (hook_thread_param_t *) hccalloc (hook_threads, sizeof (hook_thread_param_t));
for (int i = 0; i < hook_threads; i++)
{
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
hook_thread_param->tid = i;
hook_thread_param->tsz = hook_threads;
hook_thread_param->module_ctx = module_ctx;
hook_thread_param->status_ctx = status_ctx;
hook_thread_param->device_param = device_param;
hook_thread_param->hook_extra_param = module_ctx->hook_extra_params[i];
hook_thread_param->hook_salts_buf = hashes->hook_salts_buf;
hook_thread_param->salt_pos = salt_pos;
hook_thread_param->pws_cnt = pws_cnt;
}
hc_thread_t *c_threads = (hc_thread_t *) hccalloc (hook_threads, sizeof (hc_thread_t));
for (int i = 0; i < hook_threads; i++)
{
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
hc_thread_create (c_threads[i], hook23_thread, hook_thread_param);
}
hc_thread_wait (hook_threads, c_threads);
hcfree (c_threads);
hcfree (hook_threads_param);
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
}
}
}
}
if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
{
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_pos, pws_cnt, false, 0) == -1) return -1;
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
}
const int hook_threads = (int) user_options->hook_threads;
hook_thread_param_t *hook_threads_param = (hook_thread_param_t *) hccalloc (hook_threads, sizeof (hook_thread_param_t));
for (int i = 0; i < hook_threads; i++)
{
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
hook_thread_param->tid = i;
hook_thread_param->tsz = hook_threads;
hook_thread_param->module_ctx = module_ctx;
hook_thread_param->status_ctx = status_ctx;
hook_thread_param->device_param = device_param;
hook_thread_param->hook_extra_param = module_ctx->hook_extra_params[i];
hook_thread_param->hook_salts_buf = hashes->hook_salts_buf;
hook_thread_param->salt_pos = salt_pos;
hook_thread_param->pws_cnt = pws_cnt;
}
hc_thread_t *c_threads = (hc_thread_t *) hccalloc (hook_threads, sizeof (hc_thread_t));
for (int i = 0; i < hook_threads; i++)
{
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
hc_thread_create (c_threads[i], hook23_thread, hook_thread_param);
}
hc_thread_wait (hook_threads, c_threads);
hcfree (c_threads);
hcfree (hook_threads_param);
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
}
}
}
// init2 and loop2 are kind of special, we use run_loop for them, too
// note: they also do not influence the performance screen
// in case you want to use this, this cane make sense only if your input data comes out of tmps[]
if (run_loop == true)
if (hashconfig->opts_type & OPTS_TYPE_INIT2)
{
// note: they also do not influence the performance screen
// in case you want to use this, this cane make sense only if your input data comes out of tmps[]
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, pws_pos, pws_cnt, false, 0) == -1) return -1;
}
if (hashconfig->opts_type & OPTS_TYPE_INIT2)
if (true)
{
const u32 salt_repeats = hashes->salts_buf[salt_pos].salt_repeats;
for (u32 salt_repeat = 0; salt_repeat <= salt_repeats; salt_repeat++)
{
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, pws_pos, pws_cnt, false, 0) == -1) return -1;
}
device_param->kernel_params_buf32[34] = salt_repeat;
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
{
u32 iter = hashes->salts_buf[salt_pos].salt_iter2;
u32 loop_step = device_param->kernel_loops;
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
{
u32 loop_left = iter - loop_pos;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2P, pws_pos, pws_cnt, false, 0) == -1) return -1;
}
loop_left = MIN (loop_left, loop_step);
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
{
u32 iter = hashes->salts_buf[salt_pos].salt_iter2;
device_param->kernel_params_buf32[28] = loop_pos;
device_param->kernel_params_buf32[29] = loop_left;
u32 loop_step = device_param->kernel_loops;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
{
u32 loop_left = iter - loop_pos;
//bug?
//while (status_ctx->run_thread_level2 == false) break;
if (status_ctx->run_thread_level2 == false) break;
loop_left = MIN (loop_left, loop_step);
device_param->kernel_params_buf32[28] = loop_pos;
device_param->kernel_params_buf32[29] = loop_left;
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
//bug?
//while (status_ctx->run_thread_level2 == false) break;
if (status_ctx->run_thread_level2 == false) break;
}
}
}
}
if (run_comp == true)
if (true)
{
if (hashconfig->opts_type & OPTS_TYPE_DEEP_COMP_KERNEL)
{
@ -3525,6 +3546,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
kernel_threads = device_param->kernel_wgs12;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size12;
break;
case KERN_RUN_2P:
kernel_threads = device_param->kernel_wgs2p;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size2p;
break;
case KERN_RUN_2:
kernel_threads = device_param->kernel_wgs2;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size2;
@ -3549,6 +3574,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
kernel_threads = device_param->kernel_wgs_init2;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_init2;
break;
case KERN_RUN_LOOP2P:
kernel_threads = device_param->kernel_wgs_loop2p;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_loop2p;
break;
case KERN_RUN_LOOP2:
kernel_threads = device_param->kernel_wgs_loop2;
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_loop2;
@ -3590,8 +3619,8 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
device_param->kernel_params_buf64[34] = pws_pos;
device_param->kernel_params_buf64[35] = num;
device_param->kernel_params_buf64[35] = pws_pos;
device_param->kernel_params_buf64[36] = num;
u64 num_elements = num;
@ -3603,19 +3632,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
switch (kern_run)
{
case KERN_RUN_1: cuda_function = device_param->cuda_function1; break;
case KERN_RUN_12: cuda_function = device_param->cuda_function12; break;
case KERN_RUN_2: cuda_function = device_param->cuda_function2; break;
case KERN_RUN_2E: cuda_function = device_param->cuda_function2e; break;
case KERN_RUN_23: cuda_function = device_param->cuda_function23; break;
case KERN_RUN_3: cuda_function = device_param->cuda_function3; break;
case KERN_RUN_4: cuda_function = device_param->cuda_function4; break;
case KERN_RUN_INIT2: cuda_function = device_param->cuda_function_init2; break;
case KERN_RUN_LOOP2: cuda_function = device_param->cuda_function_loop2; break;
case KERN_RUN_AUX1: cuda_function = device_param->cuda_function_aux1; break;
case KERN_RUN_AUX2: cuda_function = device_param->cuda_function_aux2; break;
case KERN_RUN_AUX3: cuda_function = device_param->cuda_function_aux3; break;
case KERN_RUN_AUX4: cuda_function = device_param->cuda_function_aux4; break;
case KERN_RUN_1: cuda_function = device_param->cuda_function1; break;
case KERN_RUN_12: cuda_function = device_param->cuda_function12; break;
case KERN_RUN_2P: cuda_function = device_param->cuda_function2p; break;
case KERN_RUN_2: cuda_function = device_param->cuda_function2; break;
case KERN_RUN_2E: cuda_function = device_param->cuda_function2e; break;
case KERN_RUN_23: cuda_function = device_param->cuda_function23; break;
case KERN_RUN_3: cuda_function = device_param->cuda_function3; break;
case KERN_RUN_4: cuda_function = device_param->cuda_function4; break;
case KERN_RUN_INIT2: cuda_function = device_param->cuda_function_init2; break;
case KERN_RUN_LOOP2P: cuda_function = device_param->cuda_function_loop2p; break;
case KERN_RUN_LOOP2: cuda_function = device_param->cuda_function_loop2; break;
case KERN_RUN_AUX1: cuda_function = device_param->cuda_function_aux1; break;
case KERN_RUN_AUX2: cuda_function = device_param->cuda_function_aux2; break;
case KERN_RUN_AUX3: cuda_function = device_param->cuda_function_aux3; break;
case KERN_RUN_AUX4: cuda_function = device_param->cuda_function_aux4; break;
}
if (hc_cuFuncSetAttribute (hashcat_ctx, cuda_function, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1;
@ -3700,19 +3731,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
switch (kern_run)
{
case KERN_RUN_1: opencl_kernel = device_param->opencl_kernel1; break;
case KERN_RUN_12: opencl_kernel = device_param->opencl_kernel12; break;
case KERN_RUN_2: opencl_kernel = device_param->opencl_kernel2; break;
case KERN_RUN_2E: opencl_kernel = device_param->opencl_kernel2e; break;
case KERN_RUN_23: opencl_kernel = device_param->opencl_kernel23; break;
case KERN_RUN_3: opencl_kernel = device_param->opencl_kernel3; break;
case KERN_RUN_4: opencl_kernel = device_param->opencl_kernel4; break;
case KERN_RUN_INIT2: opencl_kernel = device_param->opencl_kernel_init2; break;
case KERN_RUN_LOOP2: opencl_kernel = device_param->opencl_kernel_loop2; break;
case KERN_RUN_AUX1: opencl_kernel = device_param->opencl_kernel_aux1; break;
case KERN_RUN_AUX2: opencl_kernel = device_param->opencl_kernel_aux2; break;
case KERN_RUN_AUX3: opencl_kernel = device_param->opencl_kernel_aux3; break;
case KERN_RUN_AUX4: opencl_kernel = device_param->opencl_kernel_aux4; break;
case KERN_RUN_1: opencl_kernel = device_param->opencl_kernel1; break;
case KERN_RUN_12: opencl_kernel = device_param->opencl_kernel12; break;
case KERN_RUN_2P: opencl_kernel = device_param->opencl_kernel2p; break;
case KERN_RUN_2: opencl_kernel = device_param->opencl_kernel2; break;
case KERN_RUN_2E: opencl_kernel = device_param->opencl_kernel2e; break;
case KERN_RUN_23: opencl_kernel = device_param->opencl_kernel23; break;
case KERN_RUN_3: opencl_kernel = device_param->opencl_kernel3; break;
case KERN_RUN_4: opencl_kernel = device_param->opencl_kernel4; break;
case KERN_RUN_INIT2: opencl_kernel = device_param->opencl_kernel_init2; break;
case KERN_RUN_LOOP2P: opencl_kernel = device_param->opencl_kernel_loop2p; break;
case KERN_RUN_LOOP2: opencl_kernel = device_param->opencl_kernel_loop2; break;
case KERN_RUN_AUX1: opencl_kernel = device_param->opencl_kernel_aux1; break;
case KERN_RUN_AUX2: opencl_kernel = device_param->opencl_kernel_aux2; break;
case KERN_RUN_AUX3: opencl_kernel = device_param->opencl_kernel_aux3; break;
case KERN_RUN_AUX4: opencl_kernel = device_param->opencl_kernel_aux4; break;
}
}
@ -3721,12 +3754,12 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_mem), device_param->kernel_params[i]) == -1) return -1;
}
for (u32 i = 24; i <= 33; i++)
for (u32 i = 24; i <= 34; i++)
{
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_uint), device_param->kernel_params[i]) == -1) return -1;
}
for (u32 i = 34; i <= 35; i++)
for (u32 i = 35; i <= 36; i++)
{
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]) == -1) return -1;
}
@ -3786,17 +3819,19 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
switch (kern_run)
{
case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2E: if (device_param->exec_us_prev2e[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2e[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2P: if (device_param->exec_us_prev2p[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2p[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_2E: if (device_param->exec_us_prev2e[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2e[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_LOOP2P: if (device_param->exec_us_prev_loop2p[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2p[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break;
case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break;
}
}
else
@ -3830,17 +3865,19 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
switch (kern_run)
{
case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break;
case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break;
case KERN_RUN_2E: device_param->exec_us_prev2e[iterationm] = exec_us; break;
case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break;
case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break;
case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break;
case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break;
case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break;
case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break;
case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break;
case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break;
case KERN_RUN_2P: device_param->exec_us_prev2p[iterationm] = exec_us; break;
case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break;
case KERN_RUN_2E: device_param->exec_us_prev2e[iterationm] = exec_us; break;
case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break;
case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break;
case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break;
case KERN_RUN_LOOP2P: device_param->exec_us_prev_loop2p[iterationm] = exec_us; break;
case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break;
case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break;
case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break;
case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break;
}
}
}
@ -9086,8 +9123,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params_buf32[31] = 0; // digests_cnt
device_param->kernel_params_buf32[32] = 0; // digests_offset
device_param->kernel_params_buf32[33] = 0; // combs_mode
device_param->kernel_params_buf64[34] = 0; // pws_pos
device_param->kernel_params_buf64[35] = 0; // gid_max
device_param->kernel_params_buf32[34] = 0; // salt_repeat
device_param->kernel_params_buf64[35] = 0; // pws_pos
device_param->kernel_params_buf64[36] = 0; // gid_max
if (device_param->is_cuda == true)
{
@ -9155,8 +9193,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_params[31] = &device_param->kernel_params_buf32[31];
device_param->kernel_params[32] = &device_param->kernel_params_buf32[32];
device_param->kernel_params[33] = &device_param->kernel_params_buf32[33];
device_param->kernel_params[34] = &device_param->kernel_params_buf64[34];
device_param->kernel_params[34] = &device_param->kernel_params_buf32[34];
device_param->kernel_params[35] = &device_param->kernel_params_buf64[35];
device_param->kernel_params[36] = &device_param->kernel_params_buf64[36];
if (user_options->slow_candidates == true)
{
@ -9554,6 +9593,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
// kernel2p
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1) return -1;
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1;
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1;
device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size;
}
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
{
// kernel2e
@ -9622,6 +9678,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
}
// loop2 prepare
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1) return -1;
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1;
device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size;
}
// loop2
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
@ -10142,6 +10215,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
// aux1
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2p) == -1) return -1;
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_wgs2p) == -1) return -1;
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1;
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_preferred_wgs_multiple2p) == -1) return -1;
}
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
@ -10208,6 +10296,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2) == -1) return -1;
}
// loop2 prepare
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_loop2p) == -1) return -1;
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1;
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_preferred_wgs_multiple_loop2p) == -1) return -1;
}
// loop2
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
@ -11071,12 +11176,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->cuda_function1 = NULL;
device_param->cuda_function12 = NULL;
device_param->cuda_function2p = NULL;
device_param->cuda_function2 = NULL;
device_param->cuda_function2e = NULL;
device_param->cuda_function23 = NULL;
device_param->cuda_function3 = NULL;
device_param->cuda_function4 = NULL;
device_param->cuda_function_init2 = NULL;
device_param->cuda_function_loop2p = NULL;
device_param->cuda_function_loop2 = NULL;
device_param->cuda_function_mp = NULL;
device_param->cuda_function_mp_l = NULL;
@ -11139,12 +11246,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
if (device_param->opencl_kernel1) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel1);
if (device_param->opencl_kernel12) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel12);
if (device_param->opencl_kernel2p) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2p);
if (device_param->opencl_kernel2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2);
if (device_param->opencl_kernel2e) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2e);
if (device_param->opencl_kernel23) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel23);
if (device_param->opencl_kernel3) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel3);
if (device_param->opencl_kernel4) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel4);
if (device_param->opencl_kernel_init2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_init2);
if (device_param->opencl_kernel_loop2p) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_loop2p);
if (device_param->opencl_kernel_loop2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_loop2);
if (device_param->opencl_kernel_mp) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp);
if (device_param->opencl_kernel_mp_l) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp_l);
@ -11205,12 +11314,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
device_param->opencl_d_st_esalts_buf = NULL;
device_param->opencl_kernel1 = NULL;
device_param->opencl_kernel12 = NULL;
device_param->opencl_kernel2p = NULL;
device_param->opencl_kernel2 = NULL;
device_param->opencl_kernel2e = NULL;
device_param->opencl_kernel23 = NULL;
device_param->opencl_kernel3 = NULL;
device_param->opencl_kernel4 = NULL;
device_param->opencl_kernel_init2 = NULL;
device_param->opencl_kernel_loop2p = NULL;
device_param->opencl_kernel_loop2 = NULL;
device_param->opencl_kernel_mp = NULL;
device_param->opencl_kernel_mp_l = NULL;

View File

@ -579,6 +579,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -554,6 +554,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -21,6 +21,7 @@ static const char *HASH_NAME = "bcrypt $2*$, Blowfish (Unix)";
static const u64 KERN_TYPE = 3200;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE
| OPTS_TYPE_DYNAMIC_SHARED;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat";

View File

@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE
| OPTS_TYPE_NATIVE_THREADS
| OPTS_TYPE_LOOP_PREPARE
| OPTS_TYPE_SELF_TEST_DISABLE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat";
@ -63,14 +64,14 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 1;
const u32 kernel_loops_min = 1024;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1;
const u32 kernel_loops_max = 1024;
return kernel_loops_max;
}
@ -330,6 +331,11 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
salt->scrypt_r = hc_strtoul ((const char *) r_pos, NULL, 10);
salt->scrypt_p = hc_strtoul ((const char *) p_pos, NULL, 10);
salt->salt_iter = salt->scrypt_N;
salt->salt_repeats = salt->scrypt_p - 1;
if (salt->scrypt_N % 1024) return (PARSER_SALT_VALUE); // we set loop count to 1024 fixed
// salt
const u8 *salt_pos = token.buf[4];
@ -341,8 +347,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
memcpy (salt->salt_buf, tmp_buf, tmp_len);
salt->salt_len = tmp_len;
salt->salt_iter = 1;
salt->salt_len = tmp_len;
// digest - base64 decode

View File

@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE
| OPTS_TYPE_NATIVE_THREADS
| OPTS_TYPE_LOOP_PREPARE
| OPTS_TYPE_SELF_TEST_DISABLE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat";
@ -52,14 +53,14 @@ static const u64 SCRYPT_P = 1;
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 1;
const u32 kernel_loops_min = 1024;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1;
const u32 kernel_loops_max = 1024;
return kernel_loops_max;
}
@ -299,11 +300,14 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
memcpy (salt_buf_ptr, salt_pos, salt_len);
salt->salt_len = salt_len;
salt->salt_iter = 1;
salt->scrypt_N = 16384;
salt->scrypt_r = 1;
salt->scrypt_p = 1;
salt->scrypt_N = SCRYPT_N;
salt->scrypt_r = SCRYPT_R;
salt->scrypt_p = SCRYPT_P;
salt->salt_iter = salt->scrypt_N;
salt->salt_repeats = salt->scrypt_p - 1;
// base64 decode hash

View File

@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_MP_MULTI_DISABLE
| OPTS_TYPE_NATIVE_THREADS
| OPTS_TYPE_LOOP_PREPARE
| OPTS_TYPE_SELF_TEST_DISABLE
| OPTS_TYPE_ST_HEX;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
@ -60,14 +61,14 @@ static const u64 SCRYPT_P = 1;
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 1;
const u32 kernel_loops_min = 1024;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1;
const u32 kernel_loops_max = 1024;
return kernel_loops_max;
}
@ -349,6 +350,11 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
salt->scrypt_r = scrypt_r;
salt->scrypt_p = scrypt_p;
salt->salt_iter = salt->scrypt_N;
salt->salt_repeats = salt->scrypt_p - 1;
if (salt->scrypt_N % 1024) return (PARSER_SALT_VALUE); // we set loop count to 1024 fixed
// salt
const u8 *salt_pos = token.buf[4];
@ -367,8 +373,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
ethereum_scrypt->salt_buf[6] = salt->salt_buf[6];
ethereum_scrypt->salt_buf[7] = salt->salt_buf[7];
salt->salt_iter = 1;
// ciphertext
const u8 *ciphertext_pos = token.buf[5];

View File

@ -290,6 +290,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -312,6 +312,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -600,6 +600,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -601,6 +601,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
1, // digests_cnt
0, // digests_offset
0, // combs_mode
0, // salt_repeat
0, // pws_pos
1 // gid_max
);

View File

@ -25,6 +25,7 @@ static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_BE
| OPTS_TYPE_PT_UTF16BE
| OPTS_TYPE_MP_MULTI_DISABLE
| OPTS_TYPE_NATIVE_THREADS
| OPTS_TYPE_LOOP_PREPARE
| OPTS_TYPE_SELF_TEST_DISABLE;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat";
@ -64,14 +65,14 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 1;
const u32 kernel_loops_min = 1024;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1;
const u32 kernel_loops_max = 1024;
return kernel_loops_max;
}
@ -320,6 +321,9 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
salt->scrypt_r = SCRYPT_R;
salt->scrypt_p = SCRYPT_P;
salt->salt_iter = salt->scrypt_N;
salt->salt_repeats = salt->scrypt_p - 1;
// version
const u8 *version_pos = token.buf[1];
@ -353,8 +357,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
salt->salt_buf[10] = hex_to_u32 (b2_pos + 16);
salt->salt_buf[11] = hex_to_u32 (b2_pos + 24);
salt->salt_len = 48;
salt->salt_iter = 1;
salt->salt_len = 48;
// fake digest: