mirror of
https://github.com/hashcat/hashcat.git
synced 2024-12-22 22:58:30 +00:00
Scrypt Kernels: Reduced kernel wait times by making it a true split kernel where iteration count = N value
This commit is contained in:
parent
56f47cabe2
commit
15f35fa68c
@ -62,6 +62,7 @@
|
||||
MAYBE_UNUSED const u32 digests_cnt, \
|
||||
MAYBE_UNUSED const u32 digests_offset_host, \
|
||||
MAYBE_UNUSED const u32 combs_mode, \
|
||||
MAYBE_UNUSED const u32 salt_repeat, \
|
||||
MAYBE_UNUSED const u64 pws_pos, \
|
||||
MAYBE_UNUSED const u64 gid_max
|
||||
#else
|
||||
@ -100,6 +101,7 @@
|
||||
MAYBE_UNUSED const u32 digests_cnt, \
|
||||
MAYBE_UNUSED const u32 digests_offset_host, \
|
||||
MAYBE_UNUSED const u32 combs_mode, \
|
||||
MAYBE_UNUSED const u32 salt_repeat, \
|
||||
MAYBE_UNUSED const u64 pws_pos, \
|
||||
MAYBE_UNUSED const u64 gid_max
|
||||
#endif
|
||||
|
@ -1642,6 +1642,7 @@ typedef struct salt
|
||||
u32 salt_iter;
|
||||
u32 salt_iter2;
|
||||
u32 salt_sign[2];
|
||||
u32 salt_repeats;
|
||||
|
||||
u32 orig_pos;
|
||||
|
||||
|
@ -170,14 +170,16 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
TO[idx_r2++] = R3;
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < STATE_CNT4; i++)
|
||||
{
|
||||
TI[i] = TO[i];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
@ -200,9 +202,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -230,7 +229,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_N; i++)
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
|
||||
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
|
||||
const u32 zSIZE = STATE_CNT4;
|
||||
|
||||
const u32 x = get_global_id (0);
|
||||
|
||||
const u32 xd4 = x / 4;
|
||||
const u32 xm4 = x & 3;
|
||||
|
||||
GLOBAL_AS uint4 *V;
|
||||
|
||||
switch (xm4)
|
||||
{
|
||||
case 0: V = V0; break;
|
||||
case 1: V = V1; break;
|
||||
case 2: V = V2; break;
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
|
||||
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
|
||||
{
|
||||
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
|
||||
@ -247,9 +310,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
salsa_r (X);
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -341,6 +401,41 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
// SCRYPT part, init V
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
@ -355,28 +450,19 @@ KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
#if SCRYPT_P >= 1
|
||||
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
|
||||
{
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
@ -184,7 +184,7 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
@ -207,9 +207,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -237,7 +234,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_N; i++)
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
|
||||
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
|
||||
const u32 zSIZE = STATE_CNT4;
|
||||
|
||||
const u32 x = get_global_id (0);
|
||||
|
||||
const u32 xd4 = x / 4;
|
||||
const u32 xm4 = x & 3;
|
||||
|
||||
GLOBAL_AS uint4 *V;
|
||||
|
||||
switch (xm4)
|
||||
{
|
||||
case 0: V = V0; break;
|
||||
case 1: V = V1; break;
|
||||
case 2: V = V2; break;
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
|
||||
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
|
||||
{
|
||||
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
|
||||
@ -254,9 +315,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
salsa_r (X);
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -477,6 +535,41 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m15700_loop_prepare (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
// SCRYPT part, init V
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
@ -491,28 +584,19 @@ KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
#if SCRYPT_P >= 1
|
||||
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
|
||||
{
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t))
|
||||
|
@ -225,7 +225,7 @@ DECLSPEC void salsa_r (uint4 *TI)
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
DECLSPEC void scrypt_smix_init (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
@ -248,9 +248,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -278,7 +275,71 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < SCRYPT_N; i++)
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
}
|
||||
|
||||
DECLSPEC void scrypt_smix_loop (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3)
|
||||
{
|
||||
#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z))
|
||||
#define CO Coord(xd4,y,z)
|
||||
|
||||
const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO;
|
||||
const u32 zSIZE = STATE_CNT4;
|
||||
|
||||
const u32 x = get_global_id (0);
|
||||
|
||||
const u32 xd4 = x / 4;
|
||||
const u32 xm4 = x & 3;
|
||||
|
||||
GLOBAL_AS uint4 *V;
|
||||
|
||||
switch (xm4)
|
||||
{
|
||||
case 0: V = V0; break;
|
||||
case 1: V = V1; break;
|
||||
case 2: V = V2; break;
|
||||
case 3: V = V3; break;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#else
|
||||
T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w);
|
||||
T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w);
|
||||
T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w);
|
||||
T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w);
|
||||
#endif
|
||||
|
||||
X[i + 0] = T[0];
|
||||
X[i + 1] = T[1];
|
||||
X[i + 2] = T[2];
|
||||
X[i + 3] = T[3];
|
||||
}
|
||||
|
||||
for (u32 N_pos = 0; N_pos < 1024; N_pos++)
|
||||
{
|
||||
const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1);
|
||||
|
||||
@ -295,9 +356,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui
|
||||
salsa_r (X);
|
||||
}
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (u32 i = 0; i < STATE_CNT4; i += 4)
|
||||
{
|
||||
#ifdef IS_CUDA
|
||||
@ -429,6 +487,41 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= gid_max) return;
|
||||
|
||||
// SCRYPT part, init V
|
||||
|
||||
GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf;
|
||||
GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf;
|
||||
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
scrypt_smix_init (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
@ -443,28 +536,19 @@ KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
uint4 X[STATE_CNT4];
|
||||
uint4 T[STATE_CNT4];
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
const u32 P_offset = salt_repeat * STATE_CNT4;
|
||||
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]);
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[P_offset + z]);
|
||||
|
||||
#if SCRYPT_P >= 1
|
||||
for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4)
|
||||
{
|
||||
for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]);
|
||||
scrypt_smix_loop (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf);
|
||||
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
#ifdef _unroll
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[P_offset + z] = hc_swap32_4 (X[z]);
|
||||
}
|
||||
|
||||
KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
@ -62,6 +62,7 @@
|
||||
- OpenCL Runtime: Workaround JiT compiler deadlock on NVIDIA driver >= 465.89
|
||||
- RAR3 Kernels: Improved loop code, improving performance by 23%
|
||||
- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices
|
||||
- Scrypt Kernels: Reduced kernel wait times by making it a true split kernel where iteration count = N value
|
||||
|
||||
##
|
||||
## Technical
|
||||
|
@ -369,14 +369,14 @@ GeForce_GTX_TITAN 3 9900 2 A
|
||||
## SCRYPT
|
||||
##
|
||||
|
||||
DEVICE_TYPE_CPU * 8900 1 N 1
|
||||
DEVICE_TYPE_GPU * 8900 1 N 1
|
||||
DEVICE_TYPE_CPU * 9300 1 N 1
|
||||
DEVICE_TYPE_GPU * 9300 1 N 1
|
||||
DEVICE_TYPE_CPU * 15700 1 N 1
|
||||
DEVICE_TYPE_GPU * 15700 1 1 1
|
||||
DEVICE_TYPE_CPU * 22700 1 N 1
|
||||
DEVICE_TYPE_GPU * 22700 1 N 1
|
||||
DEVICE_TYPE_CPU * 8900 1 N A
|
||||
DEVICE_TYPE_GPU * 8900 1 N A
|
||||
DEVICE_TYPE_CPU * 9300 1 N A
|
||||
DEVICE_TYPE_GPU * 9300 1 N A
|
||||
DEVICE_TYPE_CPU * 15700 1 N A
|
||||
DEVICE_TYPE_GPU * 15700 1 1 A
|
||||
DEVICE_TYPE_CPU * 22700 1 N A
|
||||
DEVICE_TYPE_GPU * 22700 1 N A
|
||||
|
||||
## Here's an example of how to manually tune SCRYPT algorithm kernels for your hardware.
|
||||
## Manually tuning the GPU will yield increased performance. There is typically no noticeable change to CPU performance.
|
||||
@ -466,12 +466,12 @@ DEVICE_TYPE_GPU * 22700 1 N
|
||||
## Find the ideal -n value, then store it here along with the proper compute device name.
|
||||
## Formatting guidelines are availabe at the top of this document.
|
||||
|
||||
GeForce_GTX_980 * 8900 1 28 1
|
||||
GeForce_GTX_980 * 9300 1 128 1
|
||||
GeForce_GTX_980 * 15700 1 1 1
|
||||
GeForce_GTX_980 * 22700 1 28 1
|
||||
GeForce_GTX_980 * 8900 1 28 A
|
||||
GeForce_GTX_980 * 9300 1 128 A
|
||||
GeForce_GTX_980 * 15700 1 1 A
|
||||
GeForce_GTX_980 * 22700 1 28 A
|
||||
|
||||
GeForce_RTX_2080_Ti * 8900 1 N 1
|
||||
GeForce_RTX_2080_Ti * 9300 1 544 1
|
||||
GeForce_RTX_2080_Ti * 15700 1 4 1
|
||||
GeForce_RTX_2080_Ti * 22700 1 N 1
|
||||
GeForce_RTX_2080_Ti * 8900 1 N A
|
||||
GeForce_RTX_2080_Ti * 9300 1 544 A
|
||||
GeForce_RTX_2080_Ti * 15700 1 4 A
|
||||
GeForce_RTX_2080_Ti * 22700 1 N A
|
||||
|
@ -257,12 +257,14 @@ typedef enum kern_run
|
||||
{
|
||||
KERN_RUN_1 = 1000,
|
||||
KERN_RUN_12 = 1500,
|
||||
KERN_RUN_2P = 1999,
|
||||
KERN_RUN_2 = 2000,
|
||||
KERN_RUN_2E = 2001,
|
||||
KERN_RUN_23 = 2500,
|
||||
KERN_RUN_3 = 3000,
|
||||
KERN_RUN_4 = 4000,
|
||||
KERN_RUN_INIT2 = 5000,
|
||||
KERN_RUN_LOOP2P = 5999,
|
||||
KERN_RUN_LOOP2 = 6000,
|
||||
KERN_RUN_AUX1 = 7001,
|
||||
KERN_RUN_AUX2 = 7002,
|
||||
@ -412,30 +414,33 @@ typedef enum opts_type
|
||||
OPTS_TYPE_ST_BASE64 = (1ULL << 26),
|
||||
OPTS_TYPE_HASH_COPY = (1ULL << 28),
|
||||
OPTS_TYPE_HASH_SPLIT = (1ULL << 29),
|
||||
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 30), // a kernel which is called each time normal _loop kernel finished.
|
||||
OPTS_TYPE_LOOP_PREPARE = (1ULL << 30), // a kernel which is called each time before _loop kernel started.
|
||||
// like a hook12 kernel but without extra buffers.
|
||||
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 31), // a kernel which is called each time normal _loop kernel finished.
|
||||
// but unlike a hook kernel this kernel is called for every _loop iteration offset
|
||||
OPTS_TYPE_HOOK12 = (1ULL << 31),
|
||||
OPTS_TYPE_HOOK23 = (1ULL << 32),
|
||||
OPTS_TYPE_INIT2 = (1ULL << 33),
|
||||
OPTS_TYPE_LOOP2 = (1ULL << 34),
|
||||
OPTS_TYPE_AUX1 = (1ULL << 35),
|
||||
OPTS_TYPE_AUX2 = (1ULL << 36),
|
||||
OPTS_TYPE_AUX3 = (1ULL << 37),
|
||||
OPTS_TYPE_AUX4 = (1ULL << 38),
|
||||
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 39),
|
||||
OPTS_TYPE_HOOK12 = (1ULL << 32),
|
||||
OPTS_TYPE_HOOK23 = (1ULL << 33),
|
||||
OPTS_TYPE_INIT2 = (1ULL << 34),
|
||||
OPTS_TYPE_LOOP2_PREPARE = (1ULL << 35), // same as OPTS_TYPE_LOOP_PREPARE but for loop2 kernel
|
||||
OPTS_TYPE_LOOP2 = (1ULL << 36),
|
||||
OPTS_TYPE_AUX1 = (1ULL << 37),
|
||||
OPTS_TYPE_AUX2 = (1ULL << 38),
|
||||
OPTS_TYPE_AUX3 = (1ULL << 39),
|
||||
OPTS_TYPE_AUX4 = (1ULL << 40),
|
||||
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 41),
|
||||
OPTS_TYPE_BINARY_HASHFILE_OPTIONAL
|
||||
= (1ULL << 40), // this allows us to not enforce the use of a binary file. requires OPTS_TYPE_BINARY_HASHFILE set to be effective.
|
||||
OPTS_TYPE_PT_ADD06 = (1ULL << 41),
|
||||
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 42),
|
||||
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 43), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
|
||||
OPTS_TYPE_TM_KERNEL = (1ULL << 44),
|
||||
OPTS_TYPE_SUGGEST_KG = (1ULL << 45), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
|
||||
OPTS_TYPE_COPY_TMPS = (1ULL << 46), // if we want to use data from tmps buffer (for example get the PMK in WPA)
|
||||
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 47), // sometimes the password should not be printed to potfile
|
||||
OPTS_TYPE_DYNAMIC_SHARED = (1ULL << 48), // use dynamic shared memory (note: needs special kernel changes)
|
||||
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 49), // some algos use JiT in combinations with a salt or create too much startup time
|
||||
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 50), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
|
||||
OPTS_TYPE_NATIVE_THREADS = (1ULL << 51), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
|
||||
= (1ULL << 42), // this allows us to not enforce the use of a binary file. requires OPTS_TYPE_BINARY_HASHFILE set to be effective.
|
||||
OPTS_TYPE_PT_ADD06 = (1ULL << 43),
|
||||
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 44),
|
||||
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 45), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
|
||||
OPTS_TYPE_TM_KERNEL = (1ULL << 46),
|
||||
OPTS_TYPE_SUGGEST_KG = (1ULL << 47), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
|
||||
OPTS_TYPE_COPY_TMPS = (1ULL << 48), // if we want to use data from tmps buffer (for example get the PMK in WPA)
|
||||
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 49), // sometimes the password should not be printed to potfile
|
||||
OPTS_TYPE_DYNAMIC_SHARED = (1ULL << 50), // use dynamic shared memory (note: needs special kernel changes)
|
||||
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 51), // some algos use JiT in combinations with a salt or create too much startup time
|
||||
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 52), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
|
||||
OPTS_TYPE_NATIVE_THREADS = (1ULL << 53), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
|
||||
|
||||
} opts_type_t;
|
||||
|
||||
@ -1094,12 +1099,14 @@ typedef struct hc_device_param
|
||||
|
||||
u32 kernel_wgs1;
|
||||
u32 kernel_wgs12;
|
||||
u32 kernel_wgs2p;
|
||||
u32 kernel_wgs2;
|
||||
u32 kernel_wgs2e;
|
||||
u32 kernel_wgs23;
|
||||
u32 kernel_wgs3;
|
||||
u32 kernel_wgs4;
|
||||
u32 kernel_wgs_init2;
|
||||
u32 kernel_wgs_loop2p;
|
||||
u32 kernel_wgs_loop2;
|
||||
u32 kernel_wgs_mp;
|
||||
u32 kernel_wgs_mp_l;
|
||||
@ -1116,12 +1123,14 @@ typedef struct hc_device_param
|
||||
|
||||
u32 kernel_preferred_wgs_multiple1;
|
||||
u32 kernel_preferred_wgs_multiple12;
|
||||
u32 kernel_preferred_wgs_multiple2p;
|
||||
u32 kernel_preferred_wgs_multiple2;
|
||||
u32 kernel_preferred_wgs_multiple2e;
|
||||
u32 kernel_preferred_wgs_multiple23;
|
||||
u32 kernel_preferred_wgs_multiple3;
|
||||
u32 kernel_preferred_wgs_multiple4;
|
||||
u32 kernel_preferred_wgs_multiple_init2;
|
||||
u32 kernel_preferred_wgs_multiple_loop2p;
|
||||
u32 kernel_preferred_wgs_multiple_loop2;
|
||||
u32 kernel_preferred_wgs_multiple_mp;
|
||||
u32 kernel_preferred_wgs_multiple_mp_l;
|
||||
@ -1138,12 +1147,14 @@ typedef struct hc_device_param
|
||||
|
||||
u64 kernel_local_mem_size1;
|
||||
u64 kernel_local_mem_size12;
|
||||
u64 kernel_local_mem_size2p;
|
||||
u64 kernel_local_mem_size2;
|
||||
u64 kernel_local_mem_size2e;
|
||||
u64 kernel_local_mem_size23;
|
||||
u64 kernel_local_mem_size3;
|
||||
u64 kernel_local_mem_size4;
|
||||
u64 kernel_local_mem_size_init2;
|
||||
u64 kernel_local_mem_size_loop2p;
|
||||
u64 kernel_local_mem_size_loop2;
|
||||
u64 kernel_local_mem_size_mp;
|
||||
u64 kernel_local_mem_size_mp_l;
|
||||
@ -1160,12 +1171,14 @@ typedef struct hc_device_param
|
||||
|
||||
u64 kernel_dynamic_local_mem_size1;
|
||||
u64 kernel_dynamic_local_mem_size12;
|
||||
u64 kernel_dynamic_local_mem_size2p;
|
||||
u64 kernel_dynamic_local_mem_size2;
|
||||
u64 kernel_dynamic_local_mem_size2e;
|
||||
u64 kernel_dynamic_local_mem_size23;
|
||||
u64 kernel_dynamic_local_mem_size3;
|
||||
u64 kernel_dynamic_local_mem_size4;
|
||||
u64 kernel_dynamic_local_mem_size_init2;
|
||||
u64 kernel_dynamic_local_mem_size_loop2p;
|
||||
u64 kernel_dynamic_local_mem_size_loop2;
|
||||
u64 kernel_dynamic_local_mem_size_mp;
|
||||
u64 kernel_dynamic_local_mem_size_mp_l;
|
||||
@ -1273,11 +1286,13 @@ typedef struct hc_device_param
|
||||
// workaround cpu spinning
|
||||
|
||||
double exec_us_prev1[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev2p[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev2[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev2e[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev3[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev4[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev_init2[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev_loop2p[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev_loop2[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev_aux1[EXPECTED_ITERATIONS];
|
||||
double exec_us_prev_aux2[EXPECTED_ITERATIONS];
|
||||
@ -1378,12 +1393,14 @@ typedef struct hc_device_param
|
||||
|
||||
CUfunction cuda_function1;
|
||||
CUfunction cuda_function12;
|
||||
CUfunction cuda_function2p;
|
||||
CUfunction cuda_function2;
|
||||
CUfunction cuda_function2e;
|
||||
CUfunction cuda_function23;
|
||||
CUfunction cuda_function3;
|
||||
CUfunction cuda_function4;
|
||||
CUfunction cuda_function_init2;
|
||||
CUfunction cuda_function_loop2p;
|
||||
CUfunction cuda_function_loop2;
|
||||
CUfunction cuda_function_mp;
|
||||
CUfunction cuda_function_mp_l;
|
||||
@ -1462,12 +1479,14 @@ typedef struct hc_device_param
|
||||
|
||||
cl_kernel opencl_kernel1;
|
||||
cl_kernel opencl_kernel12;
|
||||
cl_kernel opencl_kernel2p;
|
||||
cl_kernel opencl_kernel2;
|
||||
cl_kernel opencl_kernel2e;
|
||||
cl_kernel opencl_kernel23;
|
||||
cl_kernel opencl_kernel3;
|
||||
cl_kernel opencl_kernel4;
|
||||
cl_kernel opencl_kernel_init2;
|
||||
cl_kernel opencl_kernel_loop2p;
|
||||
cl_kernel opencl_kernel_loop2;
|
||||
cl_kernel opencl_kernel_mp;
|
||||
cl_kernel opencl_kernel_mp_l;
|
||||
|
487
src/backend.c
487
src/backend.c
@ -2998,11 +2998,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
|
||||
}
|
||||
else
|
||||
{
|
||||
bool run_init = true;
|
||||
bool run_loop = true;
|
||||
bool run_comp = true;
|
||||
|
||||
if (run_init == true)
|
||||
if (true)
|
||||
{
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
@ -3089,165 +3085,190 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
|
||||
}
|
||||
}
|
||||
|
||||
if (run_loop == true)
|
||||
if (true)
|
||||
{
|
||||
u32 iter = hashes->salts_buf[salt_pos].salt_iter;
|
||||
const u32 salt_repeats = hashes->salts_buf[salt_pos].salt_repeats;
|
||||
|
||||
u32 loop_step = device_param->kernel_loops;
|
||||
|
||||
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
|
||||
for (u32 salt_repeat = 0; salt_repeat <= salt_repeats; salt_repeat++)
|
||||
{
|
||||
u32 loop_left = iter - loop_pos;
|
||||
device_param->kernel_params_buf32[34] = salt_repeat;
|
||||
|
||||
loop_left = MIN (loop_left, loop_step);
|
||||
|
||||
device_param->kernel_params_buf32[28] = loop_pos;
|
||||
device_param->kernel_params_buf32[29] = loop_left;
|
||||
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
|
||||
{
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2E, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
}
|
||||
|
||||
//bug?
|
||||
//while (status_ctx->run_thread_level2 == false) break;
|
||||
if (status_ctx->run_thread_level2 == false) break;
|
||||
|
||||
/**
|
||||
* speed
|
||||
*/
|
||||
|
||||
const float iter_part = (float) (loop_pos + loop_left) / iter;
|
||||
|
||||
const u64 perf_sum_all = (u64) (pws_cnt * iter_part);
|
||||
|
||||
double speed_msec = hc_timer_get (device_param->timer_speed);
|
||||
|
||||
const u32 speed_pos = device_param->speed_pos;
|
||||
|
||||
device_param->speed_cnt[speed_pos] = perf_sum_all;
|
||||
|
||||
device_param->speed_msec[speed_pos] = speed_msec;
|
||||
|
||||
if (user_options->speed_only == true)
|
||||
if (true)
|
||||
{
|
||||
if (speed_msec > 4000)
|
||||
const u32 iter = hashes->salts_buf[salt_pos].salt_iter;
|
||||
|
||||
const u32 loop_step = device_param->kernel_loops;
|
||||
|
||||
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
|
||||
{
|
||||
device_param->outerloop_multi *= (double) iter / (double) (loop_pos + loop_left);
|
||||
u32 loop_left = iter - loop_pos;
|
||||
|
||||
device_param->speed_pos = 1;
|
||||
loop_left = MIN (loop_left, loop_step);
|
||||
|
||||
device_param->speed_only_finish = true;
|
||||
device_param->kernel_params_buf32[28] = loop_pos;
|
||||
device_param->kernel_params_buf32[29] = loop_left;
|
||||
|
||||
return 0;
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
|
||||
{
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_2E, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
}
|
||||
|
||||
//bug?
|
||||
//while (status_ctx->run_thread_level2 == false) break;
|
||||
if (status_ctx->run_thread_level2 == false) break;
|
||||
|
||||
/**
|
||||
* speed
|
||||
*/
|
||||
|
||||
const float iter_part = (float) (loop_pos + loop_left) / iter;
|
||||
|
||||
const u64 perf_sum_all = (u64) (pws_cnt * iter_part);
|
||||
|
||||
double speed_msec = hc_timer_get (device_param->timer_speed);
|
||||
|
||||
const u32 speed_pos = device_param->speed_pos;
|
||||
|
||||
device_param->speed_cnt[speed_pos] = perf_sum_all;
|
||||
|
||||
device_param->speed_msec[speed_pos] = speed_msec;
|
||||
|
||||
if (user_options->speed_only == true)
|
||||
{
|
||||
if (speed_msec > 4000)
|
||||
{
|
||||
device_param->outerloop_multi *= (double) iter / (double) (loop_pos + loop_left);
|
||||
|
||||
device_param->speed_pos = 1;
|
||||
|
||||
device_param->speed_only_finish = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
|
||||
{
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
|
||||
}
|
||||
|
||||
const int hook_threads = (int) user_options->hook_threads;
|
||||
|
||||
hook_thread_param_t *hook_threads_param = (hook_thread_param_t *) hccalloc (hook_threads, sizeof (hook_thread_param_t));
|
||||
|
||||
for (int i = 0; i < hook_threads; i++)
|
||||
{
|
||||
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
|
||||
|
||||
hook_thread_param->tid = i;
|
||||
hook_thread_param->tsz = hook_threads;
|
||||
|
||||
hook_thread_param->module_ctx = module_ctx;
|
||||
hook_thread_param->status_ctx = status_ctx;
|
||||
|
||||
hook_thread_param->device_param = device_param;
|
||||
|
||||
hook_thread_param->hook_extra_param = module_ctx->hook_extra_params[i];
|
||||
hook_thread_param->hook_salts_buf = hashes->hook_salts_buf;
|
||||
|
||||
hook_thread_param->salt_pos = salt_pos;
|
||||
|
||||
hook_thread_param->pws_cnt = pws_cnt;
|
||||
}
|
||||
|
||||
hc_thread_t *c_threads = (hc_thread_t *) hccalloc (hook_threads, sizeof (hc_thread_t));
|
||||
|
||||
for (int i = 0; i < hook_threads; i++)
|
||||
{
|
||||
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
|
||||
|
||||
hc_thread_create (c_threads[i], hook23_thread, hook_thread_param);
|
||||
}
|
||||
|
||||
hc_thread_wait (hook_threads, c_threads);
|
||||
|
||||
hcfree (c_threads);
|
||||
|
||||
hcfree (hook_threads_param);
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
|
||||
{
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_23, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
|
||||
}
|
||||
|
||||
const int hook_threads = (int) user_options->hook_threads;
|
||||
|
||||
hook_thread_param_t *hook_threads_param = (hook_thread_param_t *) hccalloc (hook_threads, sizeof (hook_thread_param_t));
|
||||
|
||||
for (int i = 0; i < hook_threads; i++)
|
||||
{
|
||||
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
|
||||
|
||||
hook_thread_param->tid = i;
|
||||
hook_thread_param->tsz = hook_threads;
|
||||
|
||||
hook_thread_param->module_ctx = module_ctx;
|
||||
hook_thread_param->status_ctx = status_ctx;
|
||||
|
||||
hook_thread_param->device_param = device_param;
|
||||
|
||||
hook_thread_param->hook_extra_param = module_ctx->hook_extra_params[i];
|
||||
hook_thread_param->hook_salts_buf = hashes->hook_salts_buf;
|
||||
|
||||
hook_thread_param->salt_pos = salt_pos;
|
||||
|
||||
hook_thread_param->pws_cnt = pws_cnt;
|
||||
}
|
||||
|
||||
hc_thread_t *c_threads = (hc_thread_t *) hccalloc (hook_threads, sizeof (hc_thread_t));
|
||||
|
||||
for (int i = 0; i < hook_threads; i++)
|
||||
{
|
||||
hook_thread_param_t *hook_thread_param = hook_threads_param + i;
|
||||
|
||||
hc_thread_create (c_threads[i], hook23_thread, hook_thread_param);
|
||||
}
|
||||
|
||||
hc_thread_wait (hook_threads, c_threads);
|
||||
|
||||
hcfree (c_threads);
|
||||
|
||||
hcfree (hook_threads_param);
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// init2 and loop2 are kind of special, we use run_loop for them, too
|
||||
// note: they also do not influence the performance screen
|
||||
// in case you want to use this, this cane make sense only if your input data comes out of tmps[]
|
||||
|
||||
if (run_loop == true)
|
||||
if (hashconfig->opts_type & OPTS_TYPE_INIT2)
|
||||
{
|
||||
// note: they also do not influence the performance screen
|
||||
// in case you want to use this, this cane make sense only if your input data comes out of tmps[]
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_INIT2)
|
||||
if (true)
|
||||
{
|
||||
const u32 salt_repeats = hashes->salts_buf[salt_pos].salt_repeats;
|
||||
|
||||
for (u32 salt_repeat = 0; salt_repeat <= salt_repeats; salt_repeat++)
|
||||
{
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
}
|
||||
device_param->kernel_params_buf32[34] = salt_repeat;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
|
||||
{
|
||||
u32 iter = hashes->salts_buf[salt_pos].salt_iter2;
|
||||
|
||||
u32 loop_step = device_param->kernel_loops;
|
||||
|
||||
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
|
||||
{
|
||||
u32 loop_left = iter - loop_pos;
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2P, pws_pos, pws_cnt, false, 0) == -1) return -1;
|
||||
}
|
||||
|
||||
loop_left = MIN (loop_left, loop_step);
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
|
||||
{
|
||||
u32 iter = hashes->salts_buf[salt_pos].salt_iter2;
|
||||
|
||||
device_param->kernel_params_buf32[28] = loop_pos;
|
||||
device_param->kernel_params_buf32[29] = loop_left;
|
||||
u32 loop_step = device_param->kernel_loops;
|
||||
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
for (u32 loop_pos = 0, slow_iteration = 0; loop_pos < iter; loop_pos += loop_step, slow_iteration++)
|
||||
{
|
||||
u32 loop_left = iter - loop_pos;
|
||||
|
||||
//bug?
|
||||
//while (status_ctx->run_thread_level2 == false) break;
|
||||
if (status_ctx->run_thread_level2 == false) break;
|
||||
loop_left = MIN (loop_left, loop_step);
|
||||
|
||||
device_param->kernel_params_buf32[28] = loop_pos;
|
||||
device_param->kernel_params_buf32[29] = loop_left;
|
||||
|
||||
if (run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, pws_pos, pws_cnt, true, slow_iteration) == -1) return -1;
|
||||
|
||||
//bug?
|
||||
//while (status_ctx->run_thread_level2 == false) break;
|
||||
if (status_ctx->run_thread_level2 == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (run_comp == true)
|
||||
if (true)
|
||||
{
|
||||
if (hashconfig->opts_type & OPTS_TYPE_DEEP_COMP_KERNEL)
|
||||
{
|
||||
@ -3525,6 +3546,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
kernel_threads = device_param->kernel_wgs12;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size12;
|
||||
break;
|
||||
case KERN_RUN_2P:
|
||||
kernel_threads = device_param->kernel_wgs2p;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size2p;
|
||||
break;
|
||||
case KERN_RUN_2:
|
||||
kernel_threads = device_param->kernel_wgs2;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size2;
|
||||
@ -3549,6 +3574,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
kernel_threads = device_param->kernel_wgs_init2;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_init2;
|
||||
break;
|
||||
case KERN_RUN_LOOP2P:
|
||||
kernel_threads = device_param->kernel_wgs_loop2p;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_loop2p;
|
||||
break;
|
||||
case KERN_RUN_LOOP2:
|
||||
kernel_threads = device_param->kernel_wgs_loop2;
|
||||
dynamic_shared_mem = device_param->kernel_dynamic_local_mem_size_loop2;
|
||||
@ -3590,8 +3619,8 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
|
||||
kernel_threads = MIN (kernel_threads, device_param->kernel_threads);
|
||||
|
||||
device_param->kernel_params_buf64[34] = pws_pos;
|
||||
device_param->kernel_params_buf64[35] = num;
|
||||
device_param->kernel_params_buf64[35] = pws_pos;
|
||||
device_param->kernel_params_buf64[36] = num;
|
||||
|
||||
u64 num_elements = num;
|
||||
|
||||
@ -3603,19 +3632,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
{
|
||||
switch (kern_run)
|
||||
{
|
||||
case KERN_RUN_1: cuda_function = device_param->cuda_function1; break;
|
||||
case KERN_RUN_12: cuda_function = device_param->cuda_function12; break;
|
||||
case KERN_RUN_2: cuda_function = device_param->cuda_function2; break;
|
||||
case KERN_RUN_2E: cuda_function = device_param->cuda_function2e; break;
|
||||
case KERN_RUN_23: cuda_function = device_param->cuda_function23; break;
|
||||
case KERN_RUN_3: cuda_function = device_param->cuda_function3; break;
|
||||
case KERN_RUN_4: cuda_function = device_param->cuda_function4; break;
|
||||
case KERN_RUN_INIT2: cuda_function = device_param->cuda_function_init2; break;
|
||||
case KERN_RUN_LOOP2: cuda_function = device_param->cuda_function_loop2; break;
|
||||
case KERN_RUN_AUX1: cuda_function = device_param->cuda_function_aux1; break;
|
||||
case KERN_RUN_AUX2: cuda_function = device_param->cuda_function_aux2; break;
|
||||
case KERN_RUN_AUX3: cuda_function = device_param->cuda_function_aux3; break;
|
||||
case KERN_RUN_AUX4: cuda_function = device_param->cuda_function_aux4; break;
|
||||
case KERN_RUN_1: cuda_function = device_param->cuda_function1; break;
|
||||
case KERN_RUN_12: cuda_function = device_param->cuda_function12; break;
|
||||
case KERN_RUN_2P: cuda_function = device_param->cuda_function2p; break;
|
||||
case KERN_RUN_2: cuda_function = device_param->cuda_function2; break;
|
||||
case KERN_RUN_2E: cuda_function = device_param->cuda_function2e; break;
|
||||
case KERN_RUN_23: cuda_function = device_param->cuda_function23; break;
|
||||
case KERN_RUN_3: cuda_function = device_param->cuda_function3; break;
|
||||
case KERN_RUN_4: cuda_function = device_param->cuda_function4; break;
|
||||
case KERN_RUN_INIT2: cuda_function = device_param->cuda_function_init2; break;
|
||||
case KERN_RUN_LOOP2P: cuda_function = device_param->cuda_function_loop2p; break;
|
||||
case KERN_RUN_LOOP2: cuda_function = device_param->cuda_function_loop2; break;
|
||||
case KERN_RUN_AUX1: cuda_function = device_param->cuda_function_aux1; break;
|
||||
case KERN_RUN_AUX2: cuda_function = device_param->cuda_function_aux2; break;
|
||||
case KERN_RUN_AUX3: cuda_function = device_param->cuda_function_aux3; break;
|
||||
case KERN_RUN_AUX4: cuda_function = device_param->cuda_function_aux4; break;
|
||||
}
|
||||
|
||||
if (hc_cuFuncSetAttribute (hashcat_ctx, cuda_function, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1;
|
||||
@ -3700,19 +3731,21 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
{
|
||||
switch (kern_run)
|
||||
{
|
||||
case KERN_RUN_1: opencl_kernel = device_param->opencl_kernel1; break;
|
||||
case KERN_RUN_12: opencl_kernel = device_param->opencl_kernel12; break;
|
||||
case KERN_RUN_2: opencl_kernel = device_param->opencl_kernel2; break;
|
||||
case KERN_RUN_2E: opencl_kernel = device_param->opencl_kernel2e; break;
|
||||
case KERN_RUN_23: opencl_kernel = device_param->opencl_kernel23; break;
|
||||
case KERN_RUN_3: opencl_kernel = device_param->opencl_kernel3; break;
|
||||
case KERN_RUN_4: opencl_kernel = device_param->opencl_kernel4; break;
|
||||
case KERN_RUN_INIT2: opencl_kernel = device_param->opencl_kernel_init2; break;
|
||||
case KERN_RUN_LOOP2: opencl_kernel = device_param->opencl_kernel_loop2; break;
|
||||
case KERN_RUN_AUX1: opencl_kernel = device_param->opencl_kernel_aux1; break;
|
||||
case KERN_RUN_AUX2: opencl_kernel = device_param->opencl_kernel_aux2; break;
|
||||
case KERN_RUN_AUX3: opencl_kernel = device_param->opencl_kernel_aux3; break;
|
||||
case KERN_RUN_AUX4: opencl_kernel = device_param->opencl_kernel_aux4; break;
|
||||
case KERN_RUN_1: opencl_kernel = device_param->opencl_kernel1; break;
|
||||
case KERN_RUN_12: opencl_kernel = device_param->opencl_kernel12; break;
|
||||
case KERN_RUN_2P: opencl_kernel = device_param->opencl_kernel2p; break;
|
||||
case KERN_RUN_2: opencl_kernel = device_param->opencl_kernel2; break;
|
||||
case KERN_RUN_2E: opencl_kernel = device_param->opencl_kernel2e; break;
|
||||
case KERN_RUN_23: opencl_kernel = device_param->opencl_kernel23; break;
|
||||
case KERN_RUN_3: opencl_kernel = device_param->opencl_kernel3; break;
|
||||
case KERN_RUN_4: opencl_kernel = device_param->opencl_kernel4; break;
|
||||
case KERN_RUN_INIT2: opencl_kernel = device_param->opencl_kernel_init2; break;
|
||||
case KERN_RUN_LOOP2P: opencl_kernel = device_param->opencl_kernel_loop2p; break;
|
||||
case KERN_RUN_LOOP2: opencl_kernel = device_param->opencl_kernel_loop2; break;
|
||||
case KERN_RUN_AUX1: opencl_kernel = device_param->opencl_kernel_aux1; break;
|
||||
case KERN_RUN_AUX2: opencl_kernel = device_param->opencl_kernel_aux2; break;
|
||||
case KERN_RUN_AUX3: opencl_kernel = device_param->opencl_kernel_aux3; break;
|
||||
case KERN_RUN_AUX4: opencl_kernel = device_param->opencl_kernel_aux4; break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3721,12 +3754,12 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_mem), device_param->kernel_params[i]) == -1) return -1;
|
||||
}
|
||||
|
||||
for (u32 i = 24; i <= 33; i++)
|
||||
for (u32 i = 24; i <= 34; i++)
|
||||
{
|
||||
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_uint), device_param->kernel_params[i]) == -1) return -1;
|
||||
}
|
||||
|
||||
for (u32 i = 34; i <= 35; i++)
|
||||
for (u32 i = 35; i <= 36; i++)
|
||||
{
|
||||
if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_ulong), device_param->kernel_params[i]) == -1) return -1;
|
||||
}
|
||||
@ -3786,17 +3819,19 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
{
|
||||
switch (kern_run)
|
||||
{
|
||||
case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_2E: if (device_param->exec_us_prev2e[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2e[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_1: if (device_param->exec_us_prev1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev1[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_2P: if (device_param->exec_us_prev2p[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2p[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_2: if (device_param->exec_us_prev2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_2E: if (device_param->exec_us_prev2e[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev2e[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_3: if (device_param->exec_us_prev3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev3[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_4: if (device_param->exec_us_prev4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev4[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_INIT2: if (device_param->exec_us_prev_init2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_init2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_LOOP2P: if (device_param->exec_us_prev_loop2p[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2p[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_LOOP2: if (device_param->exec_us_prev_loop2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_loop2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX1: if (device_param->exec_us_prev_aux1[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux1[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX2: if (device_param->exec_us_prev_aux2[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux2[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX3: if (device_param->exec_us_prev_aux3[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux3[iterationm] * device_param->spin_damp)); break;
|
||||
case KERN_RUN_AUX4: if (device_param->exec_us_prev_aux4[iterationm] > 0) usleep ((useconds_t) (device_param->exec_us_prev_aux4[iterationm] * device_param->spin_damp)); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -3830,17 +3865,19 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
|
||||
{
|
||||
switch (kern_run)
|
||||
{
|
||||
case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break;
|
||||
case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_2E: device_param->exec_us_prev2e[iterationm] = exec_us; break;
|
||||
case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break;
|
||||
case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break;
|
||||
case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break;
|
||||
case KERN_RUN_1: device_param->exec_us_prev1[iterationm] = exec_us; break;
|
||||
case KERN_RUN_2P: device_param->exec_us_prev2p[iterationm] = exec_us; break;
|
||||
case KERN_RUN_2: device_param->exec_us_prev2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_2E: device_param->exec_us_prev2e[iterationm] = exec_us; break;
|
||||
case KERN_RUN_3: device_param->exec_us_prev3[iterationm] = exec_us; break;
|
||||
case KERN_RUN_4: device_param->exec_us_prev4[iterationm] = exec_us; break;
|
||||
case KERN_RUN_INIT2: device_param->exec_us_prev_init2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_LOOP2P: device_param->exec_us_prev_loop2p[iterationm] = exec_us; break;
|
||||
case KERN_RUN_LOOP2: device_param->exec_us_prev_loop2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX1: device_param->exec_us_prev_aux1[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX2: device_param->exec_us_prev_aux2[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX3: device_param->exec_us_prev_aux3[iterationm] = exec_us; break;
|
||||
case KERN_RUN_AUX4: device_param->exec_us_prev_aux4[iterationm] = exec_us; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -9086,8 +9123,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_params_buf32[31] = 0; // digests_cnt
|
||||
device_param->kernel_params_buf32[32] = 0; // digests_offset
|
||||
device_param->kernel_params_buf32[33] = 0; // combs_mode
|
||||
device_param->kernel_params_buf64[34] = 0; // pws_pos
|
||||
device_param->kernel_params_buf64[35] = 0; // gid_max
|
||||
device_param->kernel_params_buf32[34] = 0; // salt_repeat
|
||||
device_param->kernel_params_buf64[35] = 0; // pws_pos
|
||||
device_param->kernel_params_buf64[36] = 0; // gid_max
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
@ -9155,8 +9193,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_params[31] = &device_param->kernel_params_buf32[31];
|
||||
device_param->kernel_params[32] = &device_param->kernel_params_buf32[32];
|
||||
device_param->kernel_params[33] = &device_param->kernel_params_buf32[33];
|
||||
device_param->kernel_params[34] = &device_param->kernel_params_buf64[34];
|
||||
device_param->kernel_params[34] = &device_param->kernel_params_buf32[34];
|
||||
device_param->kernel_params[35] = &device_param->kernel_params_buf64[35];
|
||||
device_param->kernel_params[36] = &device_param->kernel_params_buf64[36];
|
||||
|
||||
if (user_options->slow_candidates == true)
|
||||
{
|
||||
@ -9554,6 +9593,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
device_param->kernel_preferred_wgs_multiple3 = device_param->cuda_warp_size;
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
|
||||
{
|
||||
// kernel2p
|
||||
|
||||
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
|
||||
|
||||
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function2p, device_param->cuda_module, kernel_name) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_wgs2p) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1;
|
||||
|
||||
device_param->kernel_preferred_wgs_multiple2p = device_param->cuda_warp_size;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
|
||||
{
|
||||
// kernel2e
|
||||
@ -9622,6 +9678,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->kernel_preferred_wgs_multiple_init2 = device_param->cuda_warp_size;
|
||||
}
|
||||
|
||||
// loop2 prepare
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
|
||||
{
|
||||
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
|
||||
|
||||
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_loop2p, device_param->cuda_module, kernel_name) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_wgs (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
|
||||
|
||||
if (get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->cuda_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1;
|
||||
|
||||
device_param->kernel_preferred_wgs_multiple_loop2p = device_param->cuda_warp_size;
|
||||
}
|
||||
|
||||
// loop2
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
|
||||
@ -10142,6 +10215,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
// aux1
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
|
||||
{
|
||||
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type);
|
||||
|
||||
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_wgs2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_local_mem_size2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel2p, &device_param->kernel_preferred_wgs_multiple2p) == -1) return -1;
|
||||
}
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED)
|
||||
{
|
||||
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type);
|
||||
@ -10208,6 +10296,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_init2, &device_param->kernel_preferred_wgs_multiple_init2) == -1) return -1;
|
||||
}
|
||||
|
||||
// loop2 prepare
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE)
|
||||
{
|
||||
snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type);
|
||||
|
||||
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program, kernel_name, &device_param->opencl_kernel_loop2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_wgs (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1;
|
||||
|
||||
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_loop2p, &device_param->kernel_preferred_wgs_multiple_loop2p) == -1) return -1;
|
||||
}
|
||||
|
||||
// loop2
|
||||
|
||||
if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
|
||||
@ -11071,12 +11176,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
device_param->cuda_function1 = NULL;
|
||||
device_param->cuda_function12 = NULL;
|
||||
device_param->cuda_function2p = NULL;
|
||||
device_param->cuda_function2 = NULL;
|
||||
device_param->cuda_function2e = NULL;
|
||||
device_param->cuda_function23 = NULL;
|
||||
device_param->cuda_function3 = NULL;
|
||||
device_param->cuda_function4 = NULL;
|
||||
device_param->cuda_function_init2 = NULL;
|
||||
device_param->cuda_function_loop2p = NULL;
|
||||
device_param->cuda_function_loop2 = NULL;
|
||||
device_param->cuda_function_mp = NULL;
|
||||
device_param->cuda_function_mp_l = NULL;
|
||||
@ -11139,12 +11246,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (device_param->opencl_kernel1) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel1);
|
||||
if (device_param->opencl_kernel12) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel12);
|
||||
if (device_param->opencl_kernel2p) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2p);
|
||||
if (device_param->opencl_kernel2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2);
|
||||
if (device_param->opencl_kernel2e) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel2e);
|
||||
if (device_param->opencl_kernel23) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel23);
|
||||
if (device_param->opencl_kernel3) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel3);
|
||||
if (device_param->opencl_kernel4) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel4);
|
||||
if (device_param->opencl_kernel_init2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_init2);
|
||||
if (device_param->opencl_kernel_loop2p) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_loop2p);
|
||||
if (device_param->opencl_kernel_loop2) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_loop2);
|
||||
if (device_param->opencl_kernel_mp) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp);
|
||||
if (device_param->opencl_kernel_mp_l) hc_clReleaseKernel (hashcat_ctx, device_param->opencl_kernel_mp_l);
|
||||
@ -11205,12 +11314,14 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
|
||||
device_param->opencl_d_st_esalts_buf = NULL;
|
||||
device_param->opencl_kernel1 = NULL;
|
||||
device_param->opencl_kernel12 = NULL;
|
||||
device_param->opencl_kernel2p = NULL;
|
||||
device_param->opencl_kernel2 = NULL;
|
||||
device_param->opencl_kernel2e = NULL;
|
||||
device_param->opencl_kernel23 = NULL;
|
||||
device_param->opencl_kernel3 = NULL;
|
||||
device_param->opencl_kernel4 = NULL;
|
||||
device_param->opencl_kernel_init2 = NULL;
|
||||
device_param->opencl_kernel_loop2p = NULL;
|
||||
device_param->opencl_kernel_loop2 = NULL;
|
||||
device_param->opencl_kernel_mp = NULL;
|
||||
device_param->opencl_kernel_mp_l = NULL;
|
||||
|
@ -579,6 +579,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -554,6 +554,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -21,6 +21,7 @@ static const char *HASH_NAME = "bcrypt $2*$, Blowfish (Unix)";
|
||||
static const u64 KERN_TYPE = 3200;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE
|
||||
| OPTS_TYPE_DYNAMIC_SHARED;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat";
|
||||
|
@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE
|
||||
| OPTS_TYPE_NATIVE_THREADS
|
||||
| OPTS_TYPE_LOOP_PREPARE
|
||||
| OPTS_TYPE_SELF_TEST_DISABLE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat";
|
||||
@ -63,14 +64,14 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_min = 1;
|
||||
const u32 kernel_loops_min = 1024;
|
||||
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 1;
|
||||
const u32 kernel_loops_max = 1024;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
@ -330,6 +331,11 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
salt->scrypt_r = hc_strtoul ((const char *) r_pos, NULL, 10);
|
||||
salt->scrypt_p = hc_strtoul ((const char *) p_pos, NULL, 10);
|
||||
|
||||
salt->salt_iter = salt->scrypt_N;
|
||||
salt->salt_repeats = salt->scrypt_p - 1;
|
||||
|
||||
if (salt->scrypt_N % 1024) return (PARSER_SALT_VALUE); // we set loop count to 1024 fixed
|
||||
|
||||
// salt
|
||||
|
||||
const u8 *salt_pos = token.buf[4];
|
||||
@ -341,8 +347,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
|
||||
memcpy (salt->salt_buf, tmp_buf, tmp_len);
|
||||
|
||||
salt->salt_len = tmp_len;
|
||||
salt->salt_iter = 1;
|
||||
salt->salt_len = tmp_len;
|
||||
|
||||
// digest - base64 decode
|
||||
|
||||
|
@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE
|
||||
| OPTS_TYPE_NATIVE_THREADS
|
||||
| OPTS_TYPE_LOOP_PREPARE
|
||||
| OPTS_TYPE_SELF_TEST_DISABLE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat";
|
||||
@ -52,14 +53,14 @@ static const u64 SCRYPT_P = 1;
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_min = 1;
|
||||
const u32 kernel_loops_min = 1024;
|
||||
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 1;
|
||||
const u32 kernel_loops_max = 1024;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
@ -299,11 +300,14 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
memcpy (salt_buf_ptr, salt_pos, salt_len);
|
||||
|
||||
salt->salt_len = salt_len;
|
||||
salt->salt_iter = 1;
|
||||
|
||||
salt->scrypt_N = 16384;
|
||||
salt->scrypt_r = 1;
|
||||
salt->scrypt_p = 1;
|
||||
salt->scrypt_N = SCRYPT_N;
|
||||
salt->scrypt_r = SCRYPT_R;
|
||||
salt->scrypt_p = SCRYPT_P;
|
||||
|
||||
salt->salt_iter = salt->scrypt_N;
|
||||
salt->salt_repeats = salt->scrypt_p - 1;
|
||||
|
||||
|
||||
// base64 decode hash
|
||||
|
||||
|
@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE
|
||||
| OPTS_TYPE_NATIVE_THREADS
|
||||
| OPTS_TYPE_LOOP_PREPARE
|
||||
| OPTS_TYPE_SELF_TEST_DISABLE
|
||||
| OPTS_TYPE_ST_HEX;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
@ -60,14 +61,14 @@ static const u64 SCRYPT_P = 1;
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_min = 1;
|
||||
const u32 kernel_loops_min = 1024;
|
||||
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 1;
|
||||
const u32 kernel_loops_max = 1024;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
@ -349,6 +350,11 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
salt->scrypt_r = scrypt_r;
|
||||
salt->scrypt_p = scrypt_p;
|
||||
|
||||
salt->salt_iter = salt->scrypt_N;
|
||||
salt->salt_repeats = salt->scrypt_p - 1;
|
||||
|
||||
if (salt->scrypt_N % 1024) return (PARSER_SALT_VALUE); // we set loop count to 1024 fixed
|
||||
|
||||
// salt
|
||||
|
||||
const u8 *salt_pos = token.buf[4];
|
||||
@ -367,8 +373,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
ethereum_scrypt->salt_buf[6] = salt->salt_buf[6];
|
||||
ethereum_scrypt->salt_buf[7] = salt->salt_buf[7];
|
||||
|
||||
salt->salt_iter = 1;
|
||||
|
||||
// ciphertext
|
||||
|
||||
const u8 *ciphertext_pos = token.buf[5];
|
||||
|
@ -290,6 +290,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -312,6 +312,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -600,6 +600,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -601,6 +601,7 @@ bool module_potfile_custom_check (MAYBE_UNUSED const hashconfig_t *hashconfig, M
|
||||
1, // digests_cnt
|
||||
0, // digests_offset
|
||||
0, // combs_mode
|
||||
0, // salt_repeat
|
||||
0, // pws_pos
|
||||
1 // gid_max
|
||||
);
|
||||
|
@ -25,6 +25,7 @@ static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_BE
|
||||
| OPTS_TYPE_PT_UTF16BE
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE
|
||||
| OPTS_TYPE_NATIVE_THREADS
|
||||
| OPTS_TYPE_LOOP_PREPARE
|
||||
| OPTS_TYPE_SELF_TEST_DISABLE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const char *ST_PASS = "hashcat";
|
||||
@ -64,14 +65,14 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
|
||||
|
||||
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_min = 1;
|
||||
const u32 kernel_loops_min = 1024;
|
||||
|
||||
return kernel_loops_min;
|
||||
}
|
||||
|
||||
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u32 kernel_loops_max = 1;
|
||||
const u32 kernel_loops_max = 1024;
|
||||
|
||||
return kernel_loops_max;
|
||||
}
|
||||
@ -320,6 +321,9 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
salt->scrypt_r = SCRYPT_R;
|
||||
salt->scrypt_p = SCRYPT_P;
|
||||
|
||||
salt->salt_iter = salt->scrypt_N;
|
||||
salt->salt_repeats = salt->scrypt_p - 1;
|
||||
|
||||
// version
|
||||
|
||||
const u8 *version_pos = token.buf[1];
|
||||
@ -353,8 +357,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
|
||||
salt->salt_buf[10] = hex_to_u32 (b2_pos + 16);
|
||||
salt->salt_buf[11] = hex_to_u32 (b2_pos + 24);
|
||||
|
||||
salt->salt_len = 48;
|
||||
salt->salt_iter = 1;
|
||||
salt->salt_len = 48;
|
||||
|
||||
// fake digest:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user