Merge branch 'master' into snmpv3_sha512

pull/2905/head
Jens Steube 3 years ago committed by GitHub
commit 63a0c5a1e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1483,7 +1483,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c)
return r;
}
#if HAS_VPERM
#if HAS_VPERM == 1
DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c)
{
u32x r = 0;
@ -1533,7 +1533,7 @@ DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c)
}
#endif
#if HAS_VADD3
#if HAS_VADD3 == 1
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
{
/*
@ -2066,6 +2066,12 @@ DECLSPEC int hc_enc_next (hc_enc_t *hc_enc, const u32 *src_buf, const int src_le
int src_pos = hc_enc->pos;
#if VENDOR_ID == 8
// Work around segmentation fault in Intel JiT
// Tested with 2021.12.6.0.19_160000
volatile
#endif
int dst_pos = hc_enc->clen;
dst_buf[0] = hc_enc->cbuf;
@ -2197,6 +2203,12 @@ DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf,
int src_pos = hc_enc->pos;
#if VENDOR_ID == 8
// Work around segmentation fault in Intel JiT
// Tested with 2021.12.6.0.19_160000
volatile
#endif
int dst_pos = hc_enc->clen;
dst_buf[0] = hc_enc->cbuf;
@ -2300,6 +2312,8 @@ DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf,
if ((dst_pos + 2) == dst_sz)
{
// this section seems to break intel opencl runtime but is unknown why
dst_ptr[dst_pos++] = (a >> 0) & 0xff;
dst_ptr[dst_pos++] = (a >> 8) & 0xff;
@ -2767,7 +2781,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x3727);
out1[0] = hc_byte_perm (in[0], 0, 0x1707);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
@ -2805,7 +2819,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x1707);
out1[0] = hc_byte_perm (in[0], 0, 0x3727);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
@ -2843,7 +2857,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7372);
out1[0] = hc_byte_perm (in[0], 0, 0x7170);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
@ -2881,7 +2895,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7170);
out1[0] = hc_byte_perm (in[0], 0, 0x7372);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
@ -2915,7 +2929,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
@ -2945,7 +2959,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM == 1
out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
@ -36533,7 +36547,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
@ -36571,7 +36585,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
@ -36605,7 +36619,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
@ -36635,7 +36649,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);

@ -124,7 +124,9 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
// HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
#elif 0
__asm__ __volatile__
(
"V_SUB_U32 %0, %9, %17;"
@ -176,7 +178,9 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
// HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
#elif 0
__asm__ __volatile__
(
"V_ADD_U32 %0, %9, %17;"

@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a)
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
vconv64_t in;
@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a)
return out.v64;
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
vconv64_t in;
@ -113,7 +113,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a)
#else
return hc_rotr64_S (a, 16);
return hc_rotr64_S (a, 24);
#endif
}

@ -193,46 +193,35 @@ DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return __atomic_fetch_sub (p, val, __ATOMIC_RELAXED);
return atomicSub (p, val);
}
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
{
volatile const u32 val = 1;
return __atomic_fetch_add (p, val, __ATOMIC_RELAXED);
return atomicAdd (p, val);
}
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
{
return __atomic_fetch_or (p, val, __ATOMIC_RELAXED);
}
extern "C" __device__ __attribute__((pure)) double __ocml_log2_f64(double);
DECLSPEC double log2 (double x)
{
return __ocml_log2_f64 (x);
return atomicOr (p, val);
}
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
DECLSPEC size_t get_global_id (const u32 dimindx)
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
{
return (__ockl_get_group_id (dimindx) * __ockl_get_local_size (dimindx)) + __ockl_get_local_id (dimindx);
return (blockIdx.x * blockDim.x) + threadIdx.x;
}
DECLSPEC size_t get_local_id (const u32 dimindx)
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
{
return __ockl_get_local_id (dimindx);
return threadIdx.x;
}
DECLSPEC size_t get_local_size (const u32 dimindx)
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
{
return __ockl_get_local_size (dimindx);
// verify
return blockDim.x;
}
DECLSPEC u32x rotl32 (const u32x a, const int n)
@ -308,11 +297,8 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
return out.v64;
}
extern "C" __device__ int printf(const char *fmt, ...);
//int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
#define FIXED_THREAD_COUNT(n) __attribute__((amdgpu_flat_work_group_size (1, (n))))
#define SYNC_THREADS() __builtin_amdgcn_s_barrier ()
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
#define SYNC_THREADS() __syncthreads ()
#endif
#ifdef IS_OPENCL

@ -269,6 +269,35 @@ DECLSPEC int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p
return (len);
}
DECLSPEC int mangle_toggle_at_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len)
{
if (len >= RP_PASSWORD_SIZE) return (len);
u8 occurence = 0;
u32 rem = 0;
for (int i = 0, idx = 0; i < len; i += 4, idx += 1)
{
const u32 t = buf[idx];
buf[idx] = t | generate_cmask (t);
u32 out = rem;
rem = 0;
if (((t >> 0) & 0xff) == p1) { if (occurence == p0) out = 0x0000ff00; occurence++; }
if (((t >> 8) & 0xff) == p1) { if (occurence == p0) out = 0x00ff0000; occurence++; }
if (((t >> 16) & 0xff) == p1) { if (occurence == p0) out = 0xff000000; occurence++; }
if (((t >> 24) & 0xff) == p1) { if (occurence == p0) rem = 0x000000ff; occurence++; }
buf[idx] = t ^ (generate_cmask (t) & out);
}
return (len);
}
DECLSPEC int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len)
{
for (int l = 0; l < len / 2; l++)
@ -725,6 +754,7 @@ DECLSPEC int apply_rule (const u32 name, MAYBE_UNUSED const u8 p0, MAYBE_UNUSED
case RULE_OP_MANGLE_UREST_LFIRST: out_len = mangle_urest_lfirst (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_TREST: out_len = mangle_trest (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_TOGGLE_AT: out_len = mangle_toggle_at (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_TOGGLE_AT_SEP: out_len = mangle_toggle_at_sep (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_REVERSE: out_len = mangle_reverse (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_DUPEWORD: out_len = mangle_dupeword (p0, p1, buf, out_len); break;
case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = mangle_dupeword_times (p0, p1, (u8 *) buf, out_len); break;

@ -21,6 +21,7 @@
#define RULE_OP_MANGLE_UREST_LFIRST 'C'
#define RULE_OP_MANGLE_TREST 't'
#define RULE_OP_MANGLE_TOGGLE_AT 'T'
#define RULE_OP_MANGLE_TOGGLE_AT_SEP '3'
#define RULE_OP_MANGLE_REVERSE 'r'
#define RULE_OP_MANGLE_DUPEWORD 'd'
#define RULE_OP_MANGLE_DUPEWORD_TIMES 'p'
@ -83,6 +84,7 @@ DECLSPEC int mangle_urest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u
DECLSPEC int mangle_urest_lfirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_trest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_toggle_at_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_dupeword (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
DECLSPEC int mangle_dupeword_times (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len);

@ -1163,6 +1163,82 @@ DECLSPEC u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c
return (in_len);
}
DECLSPEC u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len)
{
if (in_len == 0) return in_len;
u32 r0 = search_on_register (buf0[0], p1);
u32 r1 = search_on_register (buf0[1], p1);
u32 r2 = search_on_register (buf0[2], p1);
u32 r3 = search_on_register (buf0[3], p1);
u32 r4 = search_on_register (buf1[0], p1);
u32 r5 = search_on_register (buf1[1], p1);
u32 r6 = search_on_register (buf1[2], p1);
u32 r7 = search_on_register (buf1[3], p1);
const u32 rn = (r0 << 0)
| (r1 << 4)
| (r2 << 8)
| (r3 << 12)
| (r4 << 16)
| (r5 << 20)
| (r6 << 24)
| (r7 << 28);
if (rn == 0) return in_len;
u32 occurence = 0;
u32 ro = 0;
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 32; i++)
{
if ((rn >> i) & 1)
{
if (occurence == p0)
{
ro = 1 << i;
break;
}
occurence++;
}
}
r0 = (ro >> 0) & 15;
r1 = (ro >> 4) & 15;
r2 = (ro >> 8) & 15;
r3 = (ro >> 12) & 15;
r4 = (ro >> 16) & 15;
r5 = (ro >> 20) & 15;
r6 = (ro >> 24) & 15;
r7 = (ro >> 28) & 15;
r0 <<= 1;
r1 <<= 1; r1 |= r0 >> 4;
r2 <<= 1; r2 |= r1 >> 4;
r3 <<= 1; r3 |= r2 >> 4;
r4 <<= 1; r4 |= r3 >> 4;
r5 <<= 1; r5 |= r4 >> 4;
r6 <<= 1; r6 |= r5 >> 4;
r7 <<= 1; r7 |= r6 >> 4;
buf0[0] = toggle_on_register (buf0[0], r0);
buf0[1] = toggle_on_register (buf0[1], r1);
buf0[2] = toggle_on_register (buf0[2], r2);
buf0[3] = toggle_on_register (buf0[3], r3);
buf1[0] = toggle_on_register (buf1[0], r4);
buf1[1] = toggle_on_register (buf1[1], r5);
buf1[2] = toggle_on_register (buf1[2], r6);
buf1[3] = toggle_on_register (buf1[3], r7);
return in_len;
}
DECLSPEC u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len)
{
reverse_block_optimized (buf0, buf1, buf0, buf1, in_len);
@ -2285,6 +2361,7 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u
case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_TOGGLE_AT_SEP: out_len = rule_op_mangle_toggle_at_sep (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break;
case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break;

@ -21,6 +21,7 @@
#define RULE_OP_MANGLE_UREST_LFIRST 'C'
#define RULE_OP_MANGLE_TREST 't'
#define RULE_OP_MANGLE_TOGGLE_AT 'T'
#define RULE_OP_MANGLE_TOGGLE_AT_SEP '3'
#define RULE_OP_MANGLE_REVERSE 'r'
#define RULE_OP_MANGLE_DUPEWORD 'd'
#define RULE_OP_MANGLE_DUPEWORD_TIMES 'p'
@ -85,6 +86,7 @@ DECLSPEC u32 rule_op_mangle_lrest_ufirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSE
DECLSPEC u32 rule_op_mangle_urest_lfirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_trest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_dupeword (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
DECLSPEC u32 rule_op_mangle_dupeword_times (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);

@ -21,96 +21,23 @@
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long long ulong;
typedef unsigned long ulong;
typedef unsigned long long ullong;
#endif
#ifdef IS_HIP
// https://github.com/llvm-mirror/clang/blob/master/lib/Headers/opencl-c-base.h
// built-in scalar data types:
/**
* An unsigned 8-bit integer.
*/
typedef unsigned char uchar;
/**
* An unsigned 16-bit integer.
*/
typedef unsigned short ushort;
/**
* An unsigned 32-bit integer.
*/
typedef unsigned int uint;
/**
* An unsigned 64-bit integer.
*/
typedef unsigned long ulong;
/**
* The unsigned integer type of the result of the sizeof operator. This
* is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __SIZE_TYPE__ size_t;
// built-in vector data types:
typedef char char2 __attribute__((ext_vector_type(2)));
typedef char char3 __attribute__((ext_vector_type(3)));
typedef char char4 __attribute__((ext_vector_type(4)));
typedef char char8 __attribute__((ext_vector_type(8)));
typedef char char16 __attribute__((ext_vector_type(16)));
typedef uchar uchar2 __attribute__((ext_vector_type(2)));
typedef uchar uchar3 __attribute__((ext_vector_type(3)));
typedef uchar uchar4 __attribute__((ext_vector_type(4)));
typedef uchar uchar8 __attribute__((ext_vector_type(8)));
typedef uchar uchar16 __attribute__((ext_vector_type(16)));
typedef short short2 __attribute__((ext_vector_type(2)));
typedef short short3 __attribute__((ext_vector_type(3)));
typedef short short4 __attribute__((ext_vector_type(4)));
typedef short short8 __attribute__((ext_vector_type(8)));
typedef short short16 __attribute__((ext_vector_type(16)));
typedef ushort ushort2 __attribute__((ext_vector_type(2)));
typedef ushort ushort3 __attribute__((ext_vector_type(3)));
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
typedef ushort ushort8 __attribute__((ext_vector_type(8)));
typedef ushort ushort16 __attribute__((ext_vector_type(16)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef int int8 __attribute__((ext_vector_type(8)));
typedef int int16 __attribute__((ext_vector_type(16)));
typedef uint uint2 __attribute__((ext_vector_type(2)));
typedef uint uint3 __attribute__((ext_vector_type(3)));
typedef uint uint4 __attribute__((ext_vector_type(4)));
typedef uint uint8 __attribute__((ext_vector_type(8)));
typedef uint uint16 __attribute__((ext_vector_type(16)));
typedef long long2 __attribute__((ext_vector_type(2)));
typedef long long3 __attribute__((ext_vector_type(3)));
typedef long long4 __attribute__((ext_vector_type(4)));
typedef long long8 __attribute__((ext_vector_type(8)));
typedef long long16 __attribute__((ext_vector_type(16)));
typedef ulong ulong2 __attribute__((ext_vector_type(2)));
typedef ulong ulong3 __attribute__((ext_vector_type(3)));
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
typedef ulong ulong8 __attribute__((ext_vector_type(8)));
typedef ulong ulong16 __attribute__((ext_vector_type(16)));
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef float float8 __attribute__((ext_vector_type(8)));
typedef float float16 __attribute__((ext_vector_type(16)));
#ifdef IS_OPENCL
typedef ulong ullong;
typedef ulong2 ullong2;
typedef ulong4 ullong4;
typedef ulong8 ullong8;
typedef ulong16 ullong16;
#endif
#ifdef KERNEL_STATIC
typedef uchar u8;
typedef ushort u16;
typedef uint u32;
typedef ulong u64;
typedef ullong u64;
#else
typedef uint8_t u8;
typedef uint16_t u16;
@ -150,7 +77,7 @@ typedef u64 u64x;
#define make_u64x (u64)
#else
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
#if VECT_SIZE == 2
@ -910,7 +837,7 @@ typedef __device_builtin__ struct u64x u64x;
typedef VTYPE(uchar, VECT_SIZE) u8x;
typedef VTYPE(ushort, VECT_SIZE) u16x;
typedef VTYPE(uint, VECT_SIZE) u32x;
typedef VTYPE(ulong, VECT_SIZE) u64x;
typedef VTYPE(ullong, VECT_SIZE) u64x;
#define make_u8x (u8x)
#define make_u16x (u16x)

@ -16,7 +16,6 @@
#define IS_OPENCL
#endif
#if defined IS_NATIVE
#define CONSTANT_VK
#define CONSTANT_AS
@ -32,10 +31,6 @@
#define LOCAL_AS
#define KERNEL_FQ extern "C" __global__
#elif defined IS_HIP
#define __device__ __attribute__((device))
#define __constant__ __attribute__((constant))
#define __shared__ __attribute__((shared))
#define __global__ __attribute__((global))
#define CONSTANT_VK __constant__
#define CONSTANT_AS
#define GLOBAL_AS

@ -73,18 +73,16 @@ enum{
MZ_VERSION_ERROR = -6,
MZ_PARAM_ERROR = -10000
};
typedef unsigned long mz_ulong;
typedef ullong mz_ulong;
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
typedef unsigned char Byte;
typedef unsigned int uInt;
typedef mz_ulong uLong;
typedef Byte Bytef;
typedef uInt uIntf;
typedef char charf;
typedef int intf;
typedef void *voidpf;
typedef uLong uLongf;
typedef void *voidp;
typedef void *const voidpc;
#define Z_NULL 0
@ -204,10 +202,6 @@ DECLSPEC void *memset(u8 *s, int c, u32 len){
#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MZ_DEFAULT_WINDOW_BITS 15
#define TINFL_LZ_DICT_SIZE 32768
#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
#define TINFL_MEMCPY_G(d, s, l, p) memcpy_g(d, s, l, p)
#define TINFL_MEMSET(p, c, l) memset(p, c, (u32)l)
#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
// hashcat-patched/hashcat-specific:
#ifdef CRC32_IN_INFLATE
@ -583,7 +577,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
}
n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
TINFL_MEMCPY_G(pOut_buf_cur, pIn_buf_cur, n, pStream);
memcpy_g(pOut_buf_cur, pIn_buf_cur, n, pStream);
pIn_buf_cur += n;
pOut_buf_cur += n;
counter -= (mz_uint)n;
@ -601,7 +595,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
mz_uint i;
r->m_table_sizes[0] = 288;
r->m_table_sizes[1] = 32;
TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
memset(r->m_tables[1].m_code_size, 5, 32);
for (i = 0; i <= 143; ++i)
*p++ = 8;
for (; i <= 255; ++i)
@ -618,7 +612,8 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
r->m_table_sizes[counter] += s_min_table_sizes[counter];
}
MZ_CLEAR_OBJ(r->m_tables[2].m_code_size);
memset(r->m_tables[2].m_code_size, 0, TINFL_MAX_HUFF_SYMBOLS_0);
for (counter = 0; counter < r->m_table_sizes[2]; counter++)
{
mz_uint s;
@ -633,9 +628,11 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
tinfl_huff_table *pTable;
mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16];
pTable = &r->m_tables[r->m_type];
MZ_CLEAR_OBJ(total_syms);
MZ_CLEAR_OBJ(pTable->m_look_up);
MZ_CLEAR_OBJ(pTable->m_tree);
memset((u8 *) total_syms, 0, 64);
memset((u8 *) pTable->m_look_up, 0, TINFL_FAST_LOOKUP_SIZE * 2);
memset((u8 *) pTable->m_tree, 0, TINFL_MAX_HUFF_SYMBOLS_0 * 2 * 2);
for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
total_syms[pTable->m_code_size[i]]++;
used_syms = 0, total = 0;
@ -707,15 +704,18 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const
num_extra = "\02\03\07"[dist - 16];
TINFL_GET_BITS(18, s, num_extra);
s += "\03\03\013"[dist - 16];
TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
memset(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
counter += s;
}
if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
{
TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
}
TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]);
TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
memcpy(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]);
memcpy(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
}
}
for (;;)

@ -24,7 +24,7 @@ typedef struct
} scrypt_tmp_t;
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
@ -41,15 +41,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
#endif
#if defined IS_HIP
inline __device__ uint4 rotate (const uint4 a, const int n)
{
return ((a << n) | ((a >> (32 - n))));
}
#endif
DECLSPEC uint4 hc_swap32_4 (uint4 v)
{
return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u));
@ -66,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
#define SALSA20_2R() \
{ \

@ -804,7 +804,7 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t))
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;
@ -970,7 +970,7 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t))
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;

@ -37,7 +37,7 @@ typedef struct zip2
u32 verify_bytes;
u32 compress_length;
u32 data_len;
u32 data_buf[0x4000000];
u32 data_buf[0x200000];
u32 auth_len;
u32 auth_buf[4];

@ -31,7 +31,7 @@ typedef struct ethereum_scrypt
} ethereum_scrypt_t;
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
@ -48,15 +48,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
#endif
#if defined IS_HIP
inline __device__ uint4 rotate (const uint4 a, const int n)
{
return ((a << n) | ((a >> (32 - n))));
}
#endif
DECLSPEC uint4 hc_swap32_4 (uint4 v)
{
return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u));
@ -73,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
#define SALSA20_2R() \
{ \

@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w)
return 1;
}
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
@ -89,15 +89,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n)
#endif
#if defined IS_HIP
inline __device__ uint4 rotate (const uint4 a, const int n)
{
return ((a << n) | ((a >> (32 - n))));
}
#endif
DECLSPEC uint4 hc_swap32_4 (uint4 v)
{
return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u));
@ -114,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
#if defined IS_CUDA
#if defined IS_CUDA || defined IS_HIP
#define SALSA20_2R() \
{ \

@ -912,7 +912,7 @@ KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t))
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;
@ -1086,7 +1086,7 @@ KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t))
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;

@ -823,7 +823,7 @@ KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t,
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;
@ -990,7 +990,7 @@ KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_
* base
*/
const u32 pw_len = pws[gid].pw_len & 255;
const u32 pw_len = pws[gid].pw_len;
const u32 salt_len = 8;

@ -0,0 +1,590 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#include "inc_hash_sha1.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 64
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
#define SNMPV3_HASH_ELEMS_MD5 4
#define SNMPV3_HASH_ELEMS_SHA1 8 // 8 = aligned 5
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
typedef struct hmac_md5_tmp
{
u32 tmp_md5[SNMPV3_TMP_ELEMS];
u32 tmp_sha1[SNMPV3_TMP_ELEMS];
u32 h_md5[SNMPV3_HASH_ELEMS_MD5];
u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1];
} hmac_md5_tmp_t;
typedef struct snmpv3
{
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
u32 salt_len;
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
u32 engineID_len;
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
} snmpv3_t;
KERNEL_FQ void m25000_init (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
u8 *src_ptr = (u8 *) w;
// password 64 times, also swapped
u32 dst_buf[16];
u8 *dst_ptr = (u8 *) dst_buf;
int tmp_idx = 0;
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < pw_len; j++)
{
const int dst_idx = tmp_idx & 63;
dst_ptr[dst_idx] = src_ptr[j];
// write to global memory every time 64 byte are written into cache
if (dst_idx == 63)
{
const int tmp_idx4 = (tmp_idx - 63) / 4;
// md5
tmps[gid].tmp_md5[tmp_idx4 + 0] = dst_buf[ 0];
tmps[gid].tmp_md5[tmp_idx4 + 1] = dst_buf[ 1];
tmps[gid].tmp_md5[tmp_idx4 + 2] = dst_buf[ 2];
tmps[gid].tmp_md5[tmp_idx4 + 3] = dst_buf[ 3];
tmps[gid].tmp_md5[tmp_idx4 + 4] = dst_buf[ 4];
tmps[gid].tmp_md5[tmp_idx4 + 5] = dst_buf[ 5];
tmps[gid].tmp_md5[tmp_idx4 + 6] = dst_buf[ 6];
tmps[gid].tmp_md5[tmp_idx4 + 7] = dst_buf[ 7];
tmps[gid].tmp_md5[tmp_idx4 + 8] = dst_buf[ 8];
tmps[gid].tmp_md5[tmp_idx4 + 9] = dst_buf[ 9];
tmps[gid].tmp_md5[tmp_idx4 + 10] = dst_buf[10];
tmps[gid].tmp_md5[tmp_idx4 + 11] = dst_buf[11];
tmps[gid].tmp_md5[tmp_idx4 + 12] = dst_buf[12];
tmps[gid].tmp_md5[tmp_idx4 + 13] = dst_buf[13];
tmps[gid].tmp_md5[tmp_idx4 + 14] = dst_buf[14];
tmps[gid].tmp_md5[tmp_idx4 + 15] = dst_buf[15];
// sha1
tmps[gid].tmp_sha1[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
tmps[gid].tmp_sha1[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
tmps[gid].tmp_sha1[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
tmps[gid].tmp_sha1[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
tmps[gid].tmp_sha1[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
tmps[gid].tmp_sha1[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
tmps[gid].tmp_sha1[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
tmps[gid].tmp_sha1[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
tmps[gid].tmp_sha1[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
tmps[gid].tmp_sha1[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
tmps[gid].tmp_sha1[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
tmps[gid].tmp_sha1[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
tmps[gid].tmp_sha1[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
tmps[gid].tmp_sha1[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
tmps[gid].tmp_sha1[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
tmps[gid].tmp_sha1[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
}
tmp_idx++;
}
}
// hash md5
tmps[gid].h_md5[0] = MD5M_A;
tmps[gid].h_md5[1] = MD5M_B;
tmps[gid].h_md5[2] = MD5M_C;
tmps[gid].h_md5[3] = MD5M_D;
// hash sha1
tmps[gid].h_sha1[0] = SHA1M_A;
tmps[gid].h_sha1[1] = SHA1M_B;
tmps[gid].h_sha1[2] = SHA1M_C;
tmps[gid].h_sha1[3] = SHA1M_D;
tmps[gid].h_sha1[4] = SHA1M_E;
}
KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 h_md5[4];
h_md5[0] = tmps[gid].h_md5[0];
h_md5[1] = tmps[gid].h_md5[1];
h_md5[2] = tmps[gid].h_md5[2];
h_md5[3] = tmps[gid].h_md5[3];
u32 h_sha1[5];
h_sha1[0] = tmps[gid].h_sha1[0];
h_sha1[1] = tmps[gid].h_sha1[1];
h_sha1[2] = tmps[gid].h_sha1[2];
h_sha1[3] = tmps[gid].h_sha1[3];
h_sha1[4] = tmps[gid].h_sha1[4];
const u32 pw_len = pws[gid].pw_len;
const int pw_len64 = pw_len * 64;
#define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
// for pw length > 64 we use global memory reads
if (pw_len < 64)
{
u32 tmp_shared[SNMPV3_TMP_ELEMS_OPT];
// md5
for (int i = 0; i < pw_len64 / 4; i++)
{
tmp_shared[i] = tmps[gid].tmp_md5[i];
}
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmp_shared[idx + 0];
w0[1] = tmp_shared[idx + 1];
w0[2] = tmp_shared[idx + 2];
w0[3] = tmp_shared[idx + 3];
w1[0] = tmp_shared[idx + 4];
w1[1] = tmp_shared[idx + 5];
w1[2] = tmp_shared[idx + 6];
w1[3] = tmp_shared[idx + 7];
w2[0] = tmp_shared[idx + 8];
w2[1] = tmp_shared[idx + 9];
w2[2] = tmp_shared[idx + 10];
w2[3] = tmp_shared[idx + 11];
w3[0] = tmp_shared[idx + 12];
w3[1] = tmp_shared[idx + 13];
w3[2] = tmp_shared[idx + 14];
w3[3] = tmp_shared[idx + 15];
md5_transform (w0, w1, w2, w3, h_md5);
}
// sha1
for (int i = 0; i < pw_len64 / 4; i++)
{
tmp_shared[i] = tmps[gid].tmp_sha1[i];
}
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmp_shared[idx + 0];
w0[1] = tmp_shared[idx + 1];
w0[2] = tmp_shared[idx + 2];
w0[3] = tmp_shared[idx + 3];
w1[0] = tmp_shared[idx + 4];
w1[1] = tmp_shared[idx + 5];
w1[2] = tmp_shared[idx + 6];
w1[3] = tmp_shared[idx + 7];
w2[0] = tmp_shared[idx + 8];
w2[1] = tmp_shared[idx + 9];
w2[2] = tmp_shared[idx + 10];
w2[3] = tmp_shared[idx + 11];
w3[0] = tmp_shared[idx + 12];
w3[1] = tmp_shared[idx + 13];
w3[2] = tmp_shared[idx + 14];
w3[3] = tmp_shared[idx + 15];
sha1_transform (w0, w1, w2, w3, h_sha1);
}
}
else
{
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
// md5
w0[0] = tmps[gid].tmp_md5[idx + 0];
w0[1] = tmps[gid].tmp_md5[idx + 1];
w0[2] = tmps[gid].tmp_md5[idx + 2];
w0[3] = tmps[gid].tmp_md5[idx + 3];
w1[0] = tmps[gid].tmp_md5[idx + 4];
w1[1] = tmps[gid].tmp_md5[idx + 5];
w1[2] = tmps[gid].tmp_md5[idx + 6];
w1[3] = tmps[gid].tmp_md5[idx + 7];
w2[0] = tmps[gid].tmp_md5[idx + 8];
w2[1] = tmps[gid].tmp_md5[idx + 9];
w2[2] = tmps[gid].tmp_md5[idx + 10];
w2[3] = tmps[gid].tmp_md5[idx + 11];
w3[0] = tmps[gid].tmp_md5[idx + 12];
w3[1] = tmps[gid].tmp_md5[idx + 13];
w3[2] = tmps[gid].tmp_md5[idx + 14];
w3[3] = tmps[gid].tmp_md5[idx + 15];
md5_transform (w0, w1, w2, w3, h_md5);
// sha1
w0[0] = tmps[gid].tmp_sha1[idx + 0];
w0[1] = tmps[gid].tmp_sha1[idx + 1];
w0[2] = tmps[gid].tmp_sha1[idx + 2];
w0[3] = tmps[gid].tmp_sha1[idx + 3];
w1[0] = tmps[gid].tmp_sha1[idx + 4];
w1[1] = tmps[gid].tmp_sha1[idx + 5];
w1[2] = tmps[gid].tmp_sha1[idx + 6];
w1[3] = tmps[gid].tmp_sha1[idx + 7];
w2[0] = tmps[gid].tmp_sha1[idx + 8];
w2[1] = tmps[gid].tmp_sha1[idx + 9];
w2[2] = tmps[gid].tmp_sha1[idx + 10];
w2[3] = tmps[gid].tmp_sha1[idx + 11];
w3[0] = tmps[gid].tmp_sha1[idx + 12];
w3[1] = tmps[gid].tmp_sha1[idx + 13];
w3[2] = tmps[gid].tmp_sha1[idx + 14];
w3[3] = tmps[gid].tmp_sha1[idx + 15];
sha1_transform (w0, w1, w2, w3, h_sha1);
}
}
tmps[gid].h_md5[0] = h_md5[0];
tmps[gid].h_md5[1] = h_md5[1];
tmps[gid].h_md5[2] = h_md5[2];
tmps[gid].h_md5[3] = h_md5[3];
tmps[gid].h_sha1[0] = h_sha1[0];
tmps[gid].h_sha1[1] = h_sha1[1];
tmps[gid].h_sha1[2] = h_sha1[2];
tmps[gid].h_sha1[3] = h_sha1[3];
tmps[gid].h_sha1[4] = h_sha1[4];
}
KERNEL_FQ void m25000_comp (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
// md5
w0[0] = 0x00000080;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 1048576 * 8;
w3[3] = 0;
u32 h_md5[4];
h_md5[0] = tmps[gid].h_md5[0];
h_md5[1] = tmps[gid].h_md5[1];
h_md5[2] = tmps[gid].h_md5[2];
h_md5[3] = tmps[gid].h_md5[3];
md5_transform (w0, w1, w2, w3, h_md5);
// sha1
w0[0] = 0x80000000;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 1048576 * 8;
u32 h_sha1[5];
h_sha1[0] = tmps[gid].h_sha1[0];
h_sha1[1] = tmps[gid].h_sha1[1];
h_sha1[2] = tmps[gid].h_sha1[2];
h_sha1[3] = tmps[gid].h_sha1[3];
h_sha1[4] = tmps[gid].h_sha1[4];
sha1_transform (w0, w1, w2, w3, h_sha1);
md5_ctx_t md5_ctx;
sha1_ctx_t sha1_ctx;
md5_init (&md5_ctx);
sha1_init (&sha1_ctx);
u32 w[16];
// md5
w[ 0] = h_md5[0];
w[ 1] = h_md5[1];
w[ 2] = h_md5[2];
w[ 3] = h_md5[3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
md5_update (&md5_ctx, w, 16);
// sha1
w[ 0] = h_sha1[0];
w[ 1] = h_sha1[1];
w[ 2] = h_sha1[2];
w[ 3] = h_sha1[3];
w[ 4] = h_sha1[4];
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha1_update (&sha1_ctx, w, 20);
// engineID
md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
sha1_update_global_swap (&sha1_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
// md5
w[ 0] = h_md5[0];
w[ 1] = h_md5[1];
w[ 2] = h_md5[2];
w[ 3] = h_md5[3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
md5_update (&md5_ctx, w, 16);
// sha1
w[ 0] = h_sha1[0];
w[ 1] = h_sha1[1];
w[ 2] = h_sha1[2];
w[ 3] = h_sha1[3];
w[ 4] = h_sha1[4];
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha1_update (&sha1_ctx, w, 20);
md5_final (&md5_ctx);
sha1_final (&sha1_ctx);
// md5
w[ 0] = md5_ctx.h[0];
w[ 1] = md5_ctx.h[1];
w[ 2] = md5_ctx.h[2];
w[ 3] = md5_ctx.h[3];
w[ 4] = 0;
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
md5_hmac_ctx_t md5_hmac_ctx;
md5_hmac_init (&md5_hmac_ctx, w, 16);
md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
md5_hmac_final (&md5_hmac_ctx);
{
const u32 r0 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R0]);
const u32 r1 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R1]);
const u32 r2 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R2]);
const u32 r3 = 0;
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}
// sha1
w[ 0] = sha1_ctx.h[0];
w[ 1] = sha1_ctx.h[1];
w[ 2] = sha1_ctx.h[2];
w[ 3] = sha1_ctx.h[3];
w[ 4] = sha1_ctx.h[4];
w[ 5] = 0;
w[ 6] = 0;
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha1_hmac_ctx_t sha1_hmac_ctx;
sha1_hmac_init (&sha1_hmac_ctx, w, 20);
sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
sha1_hmac_final (&sha1_hmac_ctx);
{
const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0];
const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1];
const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2];
const u32 r3 = 0;
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}
}

@ -18,8 +18,8 @@
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 32
#define SNMPV3_MSG_AUTH_PARAMS_MAX 12
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 64

@ -18,13 +18,17 @@
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 32
#define SNMPV3_MSG_AUTH_PARAMS_MAX 12
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_LEN 12
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 64
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
#define SNMPV3_HASH_ELEMS 8 // 8 = aligned 5
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
#define SNMPV3_HASH_ELEMS 8 // 8 = aligned 5
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
typedef struct hmac_sha1_tmp
{
@ -33,10 +37,6 @@ typedef struct hmac_sha1_tmp
} hmac_sha1_tmp_t;
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
typedef struct snmpv3
{
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];

@ -0,0 +1,369 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha224.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_MAX 16
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 64
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
#define SNMPV3_HASH_ELEMS 8
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
typedef struct hmac_sha224_tmp
{
u32 tmp[SNMPV3_TMP_ELEMS];
u32 h[SNMPV3_HASH_ELEMS];
} hmac_sha224_tmp_t;
typedef struct snmpv3
{
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
u32 salt_len;
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
u32 engineID_len;
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
} snmpv3_t;
KERNEL_FQ void m26700_init (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
u8 *src_ptr = (u8 *) w;
// password 64 times, also swapped
u32 dst_buf[16];
u8 *dst_ptr = (u8 *) dst_buf;
int tmp_idx = 0;
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < pw_len; j++)
{
const int dst_idx = tmp_idx & 63;
dst_ptr[dst_idx] = src_ptr[j];
// write to global memory every time 64 byte are written into cache
if (dst_idx == 63)
{
const int tmp_idx4 = (tmp_idx - 63) / 4;
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
}
tmp_idx++;
}
}
// hash
tmps[gid].h[0] = SHA224M_A;
tmps[gid].h[1] = SHA224M_B;
tmps[gid].h[2] = SHA224M_C;
tmps[gid].h[3] = SHA224M_D;
tmps[gid].h[4] = SHA224M_E;
tmps[gid].h[5] = SHA224M_F;
tmps[gid].h[6] = SHA224M_G;
tmps[gid].h[7] = SHA224M_H;
}
KERNEL_FQ void m26700_loop (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
const u32 pw_len = pws[gid].pw_len;
const int pw_len64 = pw_len * 64;
#define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
// for pw length > 64 we use global memory reads
if (pw_len < 64)
{
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
for (int i = 0; i < pw_len64 / 4; i++)
{
tmp[i] = tmps[gid].tmp[i];
}
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmp[idx + 0];
w0[1] = tmp[idx + 1];
w0[2] = tmp[idx + 2];
w0[3] = tmp[idx + 3];
w1[0] = tmp[idx + 4];
w1[1] = tmp[idx + 5];
w1[2] = tmp[idx + 6];
w1[3] = tmp[idx + 7];
w2[0] = tmp[idx + 8];
w2[1] = tmp[idx + 9];
w2[2] = tmp[idx + 10];
w2[3] = tmp[idx + 11];
w3[0] = tmp[idx + 12];
w3[1] = tmp[idx + 13];
w3[2] = tmp[idx + 14];
w3[3] = tmp[idx + 15];
sha224_transform (w0, w1, w2, w3, h);
}
}
else
{
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmps[gid].tmp[idx + 0];
w0[1] = tmps[gid].tmp[idx + 1];
w0[2] = tmps[gid].tmp[idx + 2];
w0[3] = tmps[gid].tmp[idx + 3];
w1[0] = tmps[gid].tmp[idx + 4];
w1[1] = tmps[gid].tmp[idx + 5];
w1[2] = tmps[gid].tmp[idx + 6];
w1[3] = tmps[gid].tmp[idx + 7];
w2[0] = tmps[gid].tmp[idx + 8];
w2[1] = tmps[gid].tmp[idx + 9];
w2[2] = tmps[gid].tmp[idx + 10];
w2[3] = tmps[gid].tmp[idx + 11];
w3[0] = tmps[gid].tmp[idx + 12];
w3[1] = tmps[gid].tmp[idx + 13];
w3[2] = tmps[gid].tmp[idx + 14];
w3[3] = tmps[gid].tmp[idx + 15];
sha224_transform (w0, w1, w2, w3, h);
}
}
tmps[gid].h[0] = h[0];
tmps[gid].h[1] = h[1];
tmps[gid].h[2] = h[2];
tmps[gid].h[3] = h[3];
tmps[gid].h[4] = h[4];
tmps[gid].h[5] = h[5];
tmps[gid].h[6] = h[6];
tmps[gid].h[7] = h[7];
}
KERNEL_FQ void m26700_comp (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = 0x80000000;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 1048576 * 8;
u32 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
sha224_transform (w0, w1, w2, w3, h);
sha224_ctx_t ctx;
sha224_init (&ctx);
u32 w[16];
w[ 0] = h[0];
w[ 1] = h[1];
w[ 2] = h[2];
w[ 3] = h[3];
w[ 4] = h[4];
w[ 5] = h[5];
w[ 6] = h[6];
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha224_update (&ctx, w, 28);
sha224_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
w[ 0] = h[0];
w[ 1] = h[1];
w[ 2] = h[2];
w[ 3] = h[3];
w[ 4] = h[4];
w[ 5] = h[5];
w[ 6] = h[6];
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha224_update (&ctx, w, 28);
sha224_final (&ctx);
w[ 0] = ctx.h[0];
w[ 1] = ctx.h[1];
w[ 2] = ctx.h[2];
w[ 3] = ctx.h[3];
w[ 4] = ctx.h[4];
w[ 5] = ctx.h[5];
w[ 6] = ctx.h[6];
w[ 7] = 0;
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha224_hmac_ctx_t hmac_ctx;
sha224_hmac_init (&hmac_ctx, w, 28);
sha224_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
sha224_hmac_final (&hmac_ctx);
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
const u32 r3 = hmac_ctx.opad.h[DGST_R3];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -0,0 +1,369 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha256.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_MAX 24
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 64
#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
#define SNMPV3_HASH_ELEMS 8
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
typedef struct hmac_sha256_tmp
{
u32 tmp[SNMPV3_TMP_ELEMS];
u32 h[SNMPV3_HASH_ELEMS];
} hmac_sha256_tmp_t;
typedef struct snmpv3
{
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
u32 salt_len;
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
u32 engineID_len;
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
} snmpv3_t;
KERNEL_FQ void m26800_init (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
u8 *src_ptr = (u8 *) w;
// password 64 times, also swapped
u32 dst_buf[16];
u8 *dst_ptr = (u8 *) dst_buf;
int tmp_idx = 0;
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < pw_len; j++)
{
const int dst_idx = tmp_idx & 63;
dst_ptr[dst_idx] = src_ptr[j];
// write to global memory every time 64 byte are written into cache
if (dst_idx == 63)
{
const int tmp_idx4 = (tmp_idx - 63) / 4;
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
}
tmp_idx++;
}
}
// hash
tmps[gid].h[0] = SHA256M_A;
tmps[gid].h[1] = SHA256M_B;
tmps[gid].h[2] = SHA256M_C;
tmps[gid].h[3] = SHA256M_D;
tmps[gid].h[4] = SHA256M_E;
tmps[gid].h[5] = SHA256M_F;
tmps[gid].h[6] = SHA256M_G;
tmps[gid].h[7] = SHA256M_H;
}
KERNEL_FQ void m26800_loop (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
const u32 pw_len = pws[gid].pw_len;
const int pw_len64 = pw_len * 64;
#define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
// for pw length > 64 we use global memory reads
if (pw_len < 64)
{
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
for (int i = 0; i < pw_len64 / 4; i++)
{
tmp[i] = tmps[gid].tmp[i];
}
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmp[idx + 0];
w0[1] = tmp[idx + 1];
w0[2] = tmp[idx + 2];
w0[3] = tmp[idx + 3];
w1[0] = tmp[idx + 4];
w1[1] = tmp[idx + 5];
w1[2] = tmp[idx + 6];
w1[3] = tmp[idx + 7];
w2[0] = tmp[idx + 8];
w2[1] = tmp[idx + 9];
w2[2] = tmp[idx + 10];
w2[3] = tmp[idx + 11];
w3[0] = tmp[idx + 12];
w3[1] = tmp[idx + 13];
w3[2] = tmp[idx + 14];
w3[3] = tmp[idx + 15];
sha256_transform (w0, w1, w2, w3, h);
}
}
else
{
for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
{
const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmps[gid].tmp[idx + 0];
w0[1] = tmps[gid].tmp[idx + 1];
w0[2] = tmps[gid].tmp[idx + 2];
w0[3] = tmps[gid].tmp[idx + 3];
w1[0] = tmps[gid].tmp[idx + 4];
w1[1] = tmps[gid].tmp[idx + 5];
w1[2] = tmps[gid].tmp[idx + 6];
w1[3] = tmps[gid].tmp[idx + 7];
w2[0] = tmps[gid].tmp[idx + 8];
w2[1] = tmps[gid].tmp[idx + 9];
w2[2] = tmps[gid].tmp[idx + 10];
w2[3] = tmps[gid].tmp[idx + 11];
w3[0] = tmps[gid].tmp[idx + 12];
w3[1] = tmps[gid].tmp[idx + 13];
w3[2] = tmps[gid].tmp[idx + 14];
w3[3] = tmps[gid].tmp[idx + 15];
sha256_transform (w0, w1, w2, w3, h);
}
}
tmps[gid].h[0] = h[0];
tmps[gid].h[1] = h[1];
tmps[gid].h[2] = h[2];
tmps[gid].h[3] = h[3];
tmps[gid].h[4] = h[4];
tmps[gid].h[5] = h[5];
tmps[gid].h[6] = h[6];
tmps[gid].h[7] = h[7];
}
KERNEL_FQ void m26800_comp (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = 0x80000000;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 1048576 * 8;
u32 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
sha256_transform (w0, w1, w2, w3, h);
sha256_ctx_t ctx;
sha256_init (&ctx);
u32 w[16];
w[ 0] = h[0];
w[ 1] = h[1];
w[ 2] = h[2];
w[ 3] = h[3];
w[ 4] = h[4];
w[ 5] = h[5];
w[ 6] = h[6];
w[ 7] = h[7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha256_update (&ctx, w, 32);
sha256_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
w[ 0] = h[0];
w[ 1] = h[1];
w[ 2] = h[2];
w[ 3] = h[3];
w[ 4] = h[4];
w[ 5] = h[5];
w[ 6] = h[6];
w[ 7] = h[7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha256_update (&ctx, w, 32);
sha256_final (&ctx);
w[ 0] = ctx.h[0];
w[ 1] = ctx.h[1];
w[ 2] = ctx.h[2];
w[ 3] = ctx.h[3];
w[ 4] = ctx.h[4];
w[ 5] = ctx.h[5];
w[ 6] = ctx.h[6];
w[ 7] = ctx.h[7];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
sha256_hmac_ctx_t hmac_ctx;
sha256_hmac_init (&hmac_ctx, w, 32);
sha256_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
sha256_hmac_final (&hmac_ctx);
const u32 r0 = hmac_ctx.opad.h[DGST_R0];
const u32 r1 = hmac_ctx.opad.h[DGST_R1];
const u32 r2 = hmac_ctx.opad.h[DGST_R2];
const u32 r3 = hmac_ctx.opad.h[DGST_R3];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -0,0 +1,495 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_sha384.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define SNMPV3_SALT_MAX 1500
#define SNMPV3_ENGINEID_MAX 34
#define SNMPV3_MSG_AUTH_PARAMS_MAX 32
#define SNMPV3_ROUNDS 1048576
#define SNMPV3_MAX_PW_LENGTH 128
#define SNMPV3_TMP_ELEMS 8192 // 8192 = (256 (max pw length) * SNMPV3_MAX_PW_LENGTH) / sizeof (u32)
#define SNMPV3_HASH_ELEMS 8
#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of SNMPV3_MAX_PW_LENGTH
#define SNMPV3_MAX_ENGINE_ELEMS 32 // 32 * 4 = 128 > 34, also has to be multiple of SNMPV3_MAX_PW_LENGTH
#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9
#define SNMPV3_MAX_PW_LENGTH_OPT 32
#define SNMPV3_TMP_ELEMS_OPT ((SNMPV3_MAX_PW_LENGTH_OPT * SNMPV3_MAX_PW_LENGTH) / 4)
// (32 * 128) / 4 = 1024
// for pw length > 32 we use global memory reads
typedef struct hmac_sha384_tmp
{
u32 tmp[SNMPV3_TMP_ELEMS];
u64 h[SNMPV3_HASH_ELEMS];
} hmac_sha384_tmp_t;
typedef struct snmpv3
{
u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
u32 salt_len;
u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
u32 engineID_len;
u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
} snmpv3_t;
KERNEL_FQ void m26900_init (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[128] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
u8 *src_ptr = (u8 *) w;
// password 128 times, also swapped
u32 dst_buf[32];
u8 *dst_ptr = (u8 *) dst_buf;
int tmp_idx = 0;
for (int i = 0; i < 128; i++)
{
for (u32 j = 0; j < pw_len; j++)
{
const int dst_idx = tmp_idx & 127;
dst_ptr[dst_idx] = src_ptr[j];
// write to global memory every time 64 byte are written into cache
if (dst_idx == 127)
{
const int tmp_idx4 = (tmp_idx - 127) / 4;
tmps[gid].tmp[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]);
tmps[gid].tmp[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]);
tmps[gid].tmp[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]);
tmps[gid].tmp[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]);
tmps[gid].tmp[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]);
tmps[gid].tmp[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]);
tmps[gid].tmp[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]);
tmps[gid].tmp[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]);
tmps[gid].tmp[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]);
tmps[gid].tmp[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]);
tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
tmps[gid].tmp[tmp_idx4 + 16] = hc_swap32_S (dst_buf[16]);
tmps[gid].tmp[tmp_idx4 + 17] = hc_swap32_S (dst_buf[17]);
tmps[gid].tmp[tmp_idx4 + 18] = hc_swap32_S (dst_buf[18]);
tmps[gid].tmp[tmp_idx4 + 19] = hc_swap32_S (dst_buf[19]);
tmps[gid].tmp[tmp_idx4 + 20] = hc_swap32_S (dst_buf[20]);
tmps[gid].tmp[tmp_idx4 + 21] = hc_swap32_S (dst_buf[21]);
tmps[gid].tmp[tmp_idx4 + 22] = hc_swap32_S (dst_buf[22]);
tmps[gid].tmp[tmp_idx4 + 23] = hc_swap32_S (dst_buf[23]);
tmps[gid].tmp[tmp_idx4 + 24] = hc_swap32_S (dst_buf[24]);
tmps[gid].tmp[tmp_idx4 + 25] = hc_swap32_S (dst_buf[25]);
tmps[gid].tmp[tmp_idx4 + 26] = hc_swap32_S (dst_buf[26]);
tmps[gid].tmp[tmp_idx4 + 27] = hc_swap32_S (dst_buf[27]);
tmps[gid].tmp[tmp_idx4 + 28] = hc_swap32_S (dst_buf[28]);
tmps[gid].tmp[tmp_idx4 + 29] = hc_swap32_S (dst_buf[29]);
tmps[gid].tmp[tmp_idx4 + 30] = hc_swap32_S (dst_buf[30]);
tmps[gid].tmp[tmp_idx4 + 31] = hc_swap32_S (dst_buf[31]);
}
tmp_idx++;
}
}
// hash
tmps[gid].h[0] = SHA384M_A;
tmps[gid].h[1] = SHA384M_B;
tmps[gid].h[2] = SHA384M_C;
tmps[gid].h[3] = SHA384M_D;
tmps[gid].h[4] = SHA384M_E;
tmps[gid].h[5] = SHA384M_F;
tmps[gid].h[6] = SHA384M_G;
tmps[gid].h[7] = SHA384M_H;
}
KERNEL_FQ void m26900_loop (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
{
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u64 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
const u32 pw_len = pws[gid].pw_len;
const int pw_len128 = pw_len * 128;
if (pw_len <= SNMPV3_MAX_PW_LENGTH_OPT)
{
u32 tmp[SNMPV3_TMP_ELEMS_OPT];
for (int i = 0; i < pw_len128 / 4; i++)
{
tmp[i] = tmps[gid].tmp[i];
}
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
{
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 w4[4];
u32 w5[4];
u32 w6[4];
u32 w7[4];
w0[0] = tmp[idx + 0];
w0[1] = tmp[idx + 1];
w0[2] = tmp[idx + 2];
w0[3] = tmp[idx + 3];
w1[0] = tmp[idx + 4];
w1[1] = tmp[idx + 5];
w1[2] = tmp[idx + 6];
w1[3] = tmp[idx + 7];
w2[0] = tmp[idx + 8];
w2[1] = tmp[idx + 9];
w2[2] = tmp[idx + 10];
w2[3] = tmp[idx + 11];
w3[0] = tmp[idx + 12];
w3[1] = tmp[idx + 13];
w3[2] = tmp[idx + 14];
w3[3] = tmp[idx + 15];
w4[0] = tmp[idx + 16];
w4[1] = tmp[idx + 17];
w4[2] = tmp[idx + 18];
w4[3] = tmp[idx + 19];
w5[0] = tmp[idx + 20];
w5[1] = tmp[idx + 21];
w5[2] = tmp[idx + 22];
w5[3] = tmp[idx + 23];
w6[0] = tmp[idx + 24];
w6[1] = tmp[idx + 25];
w6[2] = tmp[idx + 26];
w6[3] = tmp[idx + 27];
w7[0] = tmp[idx + 28];
w7[1] = tmp[idx + 29];
w7[2] = tmp[idx + 30];
w7[3] = tmp[idx + 31];
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
}
}
else
{
for (u32 i = 0, j = loop_pos; i < loop_cnt; i += 128, j += 128)
{
const int idx = (j % pw_len128) / 4; // the optimization trick is to be able to do this
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 w4[4];
u32 w5[4];
u32 w6[4];
u32 w7[4];
w0[0] = tmps[gid].tmp[idx + 0];
w0[1] = tmps[gid].tmp[idx + 1];
w0[2] = tmps[gid].tmp[idx + 2];
w0[3] = tmps[gid].tmp[idx + 3];
w1[0] = tmps[gid].tmp[idx + 4];
w1[1] = tmps[gid].tmp[idx + 5];
w1[2] = tmps[gid].tmp[idx + 6];
w1[3] = tmps[gid].tmp[idx + 7];
w2[0] = tmps[gid].tmp[idx + 8];
w2[1] = tmps[gid].tmp[idx + 9];
w2[2] = tmps[gid].tmp[idx + 10];
w2[3] = tmps[gid].tmp[idx + 11];
w3[0] = tmps[gid].tmp[idx + 12];
w3[1] = tmps[gid].tmp[idx + 13];
w3[2] = tmps[gid].tmp[idx + 14];
w3[3] = tmps[gid].tmp[idx + 15];
w4[0] = tmps[gid].tmp[idx + 16];
w4[1] = tmps[gid].tmp[idx + 17];
w4[2] = tmps[gid].tmp[idx + 18];
w4[3] = tmps[gid].tmp[idx + 19];
w5[0] = tmps[gid].tmp[idx + 20];
w5[1] = tmps[gid].tmp[idx + 21];
w5[2] = tmps[gid].tmp[idx + 22];
w5[3] = tmps[gid].tmp[idx + 23];
w6[0] = tmps[gid].tmp[idx + 24];
w6[1] = tmps[gid].tmp[idx + 25];
w6[2] = tmps[gid].tmp[idx + 26];
w6[3] = tmps[gid].tmp[idx + 27];
w7[0] = tmps[gid].tmp[idx + 28];
w7[1] = tmps[gid].tmp[idx + 29];
w7[2] = tmps[gid].tmp[idx + 30];
w7[3] = tmps[gid].tmp[idx + 31];
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
}
}
tmps[gid].h[0] = h[0];
tmps[gid].h[1] = h[1];
tmps[gid].h[2] = h[2];
tmps[gid].h[3] = h[3];
tmps[gid].h[4] = h[4];
tmps[gid].h[5] = h[5];
tmps[gid].h[6] = h[6];
tmps[gid].h[7] = h[7];
}
KERNEL_FQ void m26900_comp (KERN_ATTR_TMPS_ESALT (hmac_sha384_tmp_t, snmpv3_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
u32 w4[4];
u32 w5[4];
u32 w6[4];
u32 w7[4];
w0[0] = 0x80000000;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
w4[0] = 0;
w4[1] = 0;
w4[2] = 0;
w4[3] = 0;
w5[0] = 0;
w5[1] = 0;
w5[2] = 0;
w5[3] = 0;
w6[0] = 0;
w6[1] = 0;
w6[2] = 0;
w6[3] = 0;
w7[0] = 0;
w7[1] = 0;
w7[2] = 0;
w7[3] = 1048576 * 8;
u64 h[8];
h[0] = tmps[gid].h[0];
h[1] = tmps[gid].h[1];
h[2] = tmps[gid].h[2];
h[3] = tmps[gid].h[3];
h[4] = tmps[gid].h[4];
h[5] = tmps[gid].h[5];
h[6] = tmps[gid].h[6];
h[7] = tmps[gid].h[7];
sha384_transform (w0, w1, w2, w3, w4, w5, w6, w7, h);
sha384_ctx_t ctx;
sha384_init (&ctx);
u32 w[32];
w[ 0] = h32_from_64_S (h[0]);
w[ 1] = l32_from_64_S (h[0]);
w[ 2] = h32_from_64_S (h[1]);
w[ 3] = l32_from_64_S (h[1]);
w[ 4] = h32_from_64_S (h[2]);
w[ 5] = l32_from_64_S (h[2]);
w[ 6] = h32_from_64_S (h[3]);
w[ 7] = l32_from_64_S (h[3]);
w[ 8] = h32_from_64_S (h[4]);
w[ 9] = l32_from_64_S (h[4]);
w[10] = h32_from_64_S (h[5]);
w[11] = l32_from_64_S (h[5]);
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
w[16] = 0;
w[17] = 0;
w[18] = 0;
w[19] = 0;
w[20] = 0;
w[21] = 0;
w[22] = 0;
w[23] = 0;
w[24] = 0;
w[25] = 0;
w[26] = 0;
w[27] = 0;
w[28] = 0;
w[29] = 0;
w[30] = 0;
w[31] = 0;
sha384_update (&ctx, w, 48);
sha384_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
w[ 0] = h32_from_64_S (h[0]);
w[ 1] = l32_from_64_S (h[0]);
w[ 2] = h32_from_64_S (h[1]);
w[ 3] = l32_from_64_S (h[1]);
w[ 4] = h32_from_64_S (h[2]);
w[ 5] = l32_from_64_S (h[2]);
w[ 6] = h32_from_64_S (h[3]);
w[ 7] = l32_from_64_S (h[3]);
w[ 8] = h32_from_64_S (h[4]);
w[ 9] = l32_from_64_S (h[4]);
w[10] = h32_from_64_S (h[5]);
w[11] = l32_from_64_S (h[5]);
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
w[16] = 0;
w[17] = 0;
w[18] = 0;
w[19] = 0;
w[20] = 0;
w[21] = 0;
w[22] = 0;
w[23] = 0;
w[24] = 0;
w[25] = 0;
w[26] = 0;
w[27] = 0;
w[28] = 0;
w[29] = 0;
w[30] = 0;
w[31] = 0;
sha384_update (&ctx, w, 48);
sha384_final (&ctx);
w[ 0] = h32_from_64_S (ctx.h[0]);
w[ 1] = l32_from_64_S (ctx.h[0]);
w[ 2] = h32_from_64_S (ctx.h[1]);
w[ 3] = l32_from_64_S (ctx.h[1]);
w[ 4] = h32_from_64_S (ctx.h[2]);
w[ 5] = l32_from_64_S (ctx.h[2]);
w[ 6] = h32_from_64_S (ctx.h[3]);
w[ 7] = l32_from_64_S (ctx.h[3]);
w[ 8] = h32_from_64_S (ctx.h[4]);
w[ 9] = l32_from_64_S (ctx.h[4]);
w[10] = h32_from_64_S (ctx.h[5]);
w[11] = l32_from_64_S (ctx.h[5]);
w[12] = 0;
w[13] = 0;
w[14] = 0;
w[15] = 0;
w[16] = 0;
w[17] = 0;
w[18] = 0;
w[19] = 0;
w[20] = 0;
w[21] = 0;
w[22] = 0;
w[23] = 0;
w[24] = 0;
w[25] = 0;
w[26] = 0;
w[27] = 0;
w[28] = 0;
w[29] = 0;
w[30] = 0;
w[31] = 0;
sha384_hmac_ctx_t hmac_ctx;
sha384_hmac_init (&hmac_ctx, w, 48);
sha384_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
sha384_hmac_final (&hmac_ctx);
const u32 r0 = l32_from_64 (hmac_ctx.opad.h[1]);
const u32 r1 = h32_from_64 (hmac_ctx.opad.h[1]);
const u32 r2 = l32_from_64 (hmac_ctx.opad.h[0]);
const u32 r3 = h32_from_64 (hmac_ctx.opad.h[0]);
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -0,0 +1,697 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md4.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define PERM_OP(a,b,tt,n,m) \
{ \
tt = a >> n; \
tt = tt ^ b; \
tt = tt & m; \
b = b ^ tt; \
tt = tt << n; \
a = a ^ tt; \
}
#define HPERM_OP(a,tt,n,m) \
{ \
tt = a << (16 + n); \
tt = tt ^ a; \
tt = tt & m; \
a = a ^ tt; \
tt = tt >> (16 + n); \
a = a ^ tt; \
}
CONSTANT_VK u32a c_SPtrans[8][64] =
{
{
0x02080800, 0x00080000, 0x02000002, 0x02080802,
0x02000000, 0x00080802, 0x00080002, 0x02000002,
0x00080802, 0x02080800, 0x02080000, 0x00000802,
0x02000802, 0x02000000, 0x00000000, 0x00080002,
0x00080000, 0x00000002, 0x02000800, 0x00080800,
0x02080802, 0x02080000, 0x00000802, 0x02000800,
0x00000002, 0x00000800, 0x00080800, 0x02080002,
0x00000800, 0x02000802, 0x02080002, 0x00000000,
0x00000000, 0x02080802, 0x02000800, 0x00080002,
0x02080800, 0x00080000, 0x00000802, 0x02000800,
0x02080002, 0x00000800, 0x00080800, 0x02000002,
0x00080802, 0x00000002, 0x02000002, 0x02080000,
0x02080802, 0x00080800, 0x02080000, 0x02000802,
0x02000000, 0x00000802, 0x00080002, 0x00000000,
0x00080000, 0x02000000, 0x02000802, 0x02080800,
0x00000002, 0x02080002, 0x00000800, 0x00080802,
},
{
0x40108010, 0x00000000, 0x00108000, 0x40100000,
0x40000010, 0x00008010, 0x40008000, 0x00108000,
0x00008000, 0x40100010, 0x00000010, 0x40008000,
0x00100010, 0x40108000, 0x40100000, 0x00000010,
0x00100000, 0x40008010, 0x40100010, 0x00008000,
0x00108010, 0x40000000, 0x00000000, 0x00100010,
0x40008010, 0x00108010, 0x40108000, 0x40000010,
0x40000000, 0x00100000, 0x00008010, 0x40108010,
0x00100010, 0x40108000, 0x40008000, 0x00108010,
0x40108010, 0x00100010, 0x40000010, 0x00000000,
0x40000000, 0x00008010, 0x00100000, 0x40100010,
0x00008000, 0x40000000, 0x00108010, 0x40008010,
0x40108000, 0x00008000, 0x00000000, 0x40000010,
0x00000010, 0x40108010, 0x00108000, 0x40100000,
0x40100010, 0x00100000, 0x00008010, 0x40008000,
0x40008010, 0x00000010, 0x40100000, 0x00108000,
},
{
0x04000001, 0x04040100, 0x00000100, 0x04000101,
0x00040001, 0x04000000, 0x04000101, 0x00040100,
0x04000100, 0x00040000, 0x04040000, 0x00000001,
0x04040101, 0x00000101, 0x00000001, 0x04040001,
0x00000000, 0x00040001, 0x04040100, 0x00000100,
0x00000101, 0x04040101, 0x00040000, 0x04000001,
0x04040001, 0x04000100, 0x00040101, 0x04040000,
0x00040100, 0x00000000, 0x04000000, 0x00040101,
0x04040100, 0x00000100, 0x00000001, 0x00040000,
0x00000101, 0x00040001, 0x04040000, 0x04000101,
0x00000000, 0x04040100, 0x00040100, 0x04040001,
0x00040001, 0x04000000, 0x04040101, 0x00000001,
0x00040101, 0x04000001, 0x04000000, 0x04040101,
0x00040000, 0x04000100, 0x04000101, 0x00040100,
0x04000100, 0x00000000, 0x04040001, 0x00000101,
0x04000001, 0x00040101, 0x00000100, 0x04040000,
},
{
0x00401008, 0x10001000, 0x00000008, 0x10401008,
0x00000000, 0x10400000, 0x10001008, 0x00400008,
0x10401000, 0x10000008, 0x10000000, 0x00001008,
0x10000008, 0x00401008, 0x00400000, 0x10000000,
0x10400008, 0x00401000, 0x00001000, 0x00000008,
0x00401000, 0x10001008, 0x10400000, 0x00001000,
0x00001008, 0x00000000, 0x00400008, 0x10401000,
0x10001000, 0x10400008, 0x10401008, 0x00400000,
0x10400008, 0x00001008, 0x00400000, 0x10000008,
0x00401000, 0x10001000, 0x00000008, 0x10400000,
0x10001008, 0x00000000, 0x00001000, 0x00400008,
0x00000000, 0x10400008, 0x10401000, 0x00001000,
0x10000000, 0x10401008, 0x00401008, 0x00400000,
0x10401008, 0x00000008, 0x10001000, 0x00401008,
0x00400008, 0x00401000, 0x10400000, 0x10001008,
0x00001008, 0x10000000, 0x10000008, 0x10401000,
},
{
0x08000000, 0x00010000, 0x00000400, 0x08010420,
0x08010020, 0x08000400, 0x00010420, 0x08010000,
0x00010000, 0x00000020, 0x08000020, 0x00010400,
0x08000420, 0x08010020, 0x08010400, 0x00000000,
0x00010400, 0x08000000, 0x00010020, 0x00000420,
0x08000400, 0x00010420, 0x00000000, 0x08000020,
0x00000020, 0x08000420, 0x08010420, 0x00010020,
0x08010000, 0x00000400, 0x00000420, 0x08010400,
0x08010400, 0x08000420, 0x00010020, 0x08010000,
0x00010000, 0x00000020, 0x08000020, 0x08000400,
0x08000000, 0x00010400, 0x08010420, 0x00000000,
0x00010420, 0x08000000, 0x00000400, 0x00010020,
0x08000420, 0x00000400, 0x00000000, 0x08010420,
0x08010020, 0x08010400, 0x00000420, 0x00010000,
0x00010400, 0x08010020, 0x08000400, 0x00000420,
0x00000020, 0x00010420, 0x08010000, 0x08000020,
},
{
0x80000040, 0x00200040, 0x00000000, 0x80202000,
0x00200040, 0x00002000, 0x80002040, 0x00200000,
0x00002040, 0x80202040, 0x00202000, 0x80000000,
0x80002000, 0x80000040, 0x80200000, 0x00202040,
0x00200000, 0x80002040, 0x80200040, 0x00000000,
0x00002000, 0x00000040, 0x80202000, 0x80200040,
0x80202040, 0x80200000, 0x80000000, 0x00002040,
0x00000040, 0x00202000, 0x00202040, 0x80002000,
0x00002040, 0x80000000, 0x80002000, 0x00202040,
0x80202000, 0x00200040, 0x00000000, 0x80002000,
0x80000000, 0x00002000, 0x80200040, 0x00200000,
0x00200040, 0x80202040, 0x00202000, 0x00000040,
0x80202040, 0x00202000, 0x00200000, 0x80002040,
0x80000040, 0x80200000, 0x00202040, 0x00000000,
0x00002000, 0x80000040, 0x80002040, 0x80202000,
0x80200000, 0x00002040, 0x00000040, 0x80200040,
},
{
0x00004000, 0x00000200, 0x01000200, 0x01000004,
0x01004204, 0x00004004, 0x00004200, 0x00000000,
0x01000000, 0x01000204, 0x00000204, 0x01004000,
0x00000004, 0x01004200, 0x01004000, 0x00000204,
0x01000204, 0x00004000, 0x00004004, 0x01004204,
0x00000000, 0x01000200, 0x01000004, 0x00004200,
0x01004004, 0x00004204, 0x01004200, 0x00000004,
0x00004204, 0x01004004, 0x00000200, 0x01000000,
0x00004204, 0x01004000, 0x01004004, 0x00000204,
0x00004000, 0x00000200, 0x01000000, 0x01004004,
0x01000204, 0x00004204, 0x00004200, 0x00000000,
0x00000200, 0x01000004, 0x00000004, 0x01000200,
0x00000000, 0x01000204, 0x01000200, 0x00004200,
0x00000204, 0x00004000, 0x01004204, 0x01000000,
0x01004200, 0x00000004, 0x00004004, 0x01004204,
0x01000004, 0x01004200, 0x01004000, 0x00004004,
},
{
0x20800080, 0x20820000, 0x00020080, 0x00000000,
0x20020000, 0x00800080, 0x20800000, 0x20820080,
0x00000080, 0x20000000, 0x00820000, 0x00020080,
0x00820080, 0x20020080, 0x20000080, 0x20800000,
0x00020000, 0x00820080, 0x00800080, 0x20020000,
0x20820080, 0x20000080, 0x00000000, 0x00820000,
0x20000000, 0x00800000, 0x20020080, 0x20800080,
0x00800000, 0x00020000, 0x20820000, 0x00000080,
0x00800000, 0x00020000, 0x20000080, 0x20820080,
0x00020080, 0x20000000, 0x00000000, 0x00820000,
0x20800080, 0x20020080, 0x20020000, 0x00800080,
0x20820000, 0x00000080, 0x00800080, 0x20020000,
0x20820080, 0x00800000, 0x20800000, 0x20000080,
0x00820000, 0x00020080, 0x20020080, 0x20800000,
0x00000080, 0x20820000, 0x00820080, 0x00000000,
0x20000000, 0x20800080, 0x00020000, 0x00820080,
}
};
CONSTANT_VK u32a c_skb[8][64] =
{
{
0x00000000, 0x00000010, 0x20000000, 0x20000010,
0x00010000, 0x00010010, 0x20010000, 0x20010010,
0x00000800, 0x00000810, 0x20000800, 0x20000810,
0x00010800, 0x00010810, 0x20010800, 0x20010810,
0x00000020, 0x00000030, 0x20000020, 0x20000030,
0x00010020, 0x00010030, 0x20010020, 0x20010030,
0x00000820, 0x00000830, 0x20000820, 0x20000830,
0x00010820, 0x00010830, 0x20010820, 0x20010830,
0x00080000, 0x00080010, 0x20080000, 0x20080010,
0x00090000, 0x00090010, 0x20090000, 0x20090010,
0x00080800, 0x00080810, 0x20080800, 0x20080810,
0x00090800, 0x00090810, 0x20090800, 0x20090810,
0x00080020, 0x00080030, 0x20080020, 0x20080030,
0x00090020, 0x00090030, 0x20090020, 0x20090030,
0x00080820, 0x00080830, 0x20080820, 0x20080830,
0x00090820, 0x00090830, 0x20090820, 0x20090830,
},
{
0x00000000, 0x02000000, 0x00002000, 0x02002000,
0x00200000, 0x02200000, 0x00202000, 0x02202000,
0x00000004, 0x02000004, 0x00002004, 0x02002004,
0x00200004, 0x02200004, 0x00202004, 0x02202004,
0x00000400, 0x02000400, 0x00002400, 0x02002400,
0x00200400, 0x02200400, 0x00202400, 0x02202400,
0x00000404, 0x02000404, 0x00002404, 0x02002404,
0x00200404, 0x02200404, 0x00202404, 0x02202404,
0x10000000, 0x12000000, 0x10002000, 0x12002000,
0x10200000, 0x12200000, 0x10202000, 0x12202000,
0x10000004, 0x12000004, 0x10002004, 0x12002004,
0x10200004, 0x12200004, 0x10202004, 0x12202004,
0x10000400, 0x12000400, 0x10002400, 0x12002400,
0x10200400, 0x12200400, 0x10202400, 0x12202400,
0x10000404, 0x12000404, 0x10002404, 0x12002404,
0x10200404, 0x12200404, 0x10202404, 0x12202404,
},
{
0x00000000, 0x00000001, 0x00040000, 0x00040001,
0x01000000, 0x01000001, 0x01040000, 0x01040001,
0x00000002, 0x00000003, 0x00040002, 0x00040003,
0x01000002, 0x01000003, 0x01040002, 0x01040003,
0x00000200, 0x00000201, 0x00040200, 0x00040201,
0x01000200, 0x01000201, 0x01040200, 0x01040201,
0x00000202, 0x00000203, 0x00040202, 0x00040203,
0x01000202, 0x01000203, 0x01040202, 0x01040203,
0x08000000, 0x08000001, 0x08040000, 0x08040001,
0x09000000, 0x09000001, 0x09040000, 0x09040001,
0x08000002, 0x08000003, 0x08040002, 0x08040003,
0x09000002, 0x09000003, 0x09040002, 0x09040003,
0x08000200, 0x08000201, 0x08040200, 0x08040201,
0x09000200, 0x09000201, 0x09040200, 0x09040201,
0x08000202, 0x08000203, 0x08040202, 0x08040203,
0x09000202, 0x09000203, 0x09040202, 0x09040203,
},
{
0x00000000, 0x00100000, 0x00000100, 0x00100100,
0x00000008, 0x00100008, 0x00000108, 0x00100108,
0x00001000, 0x00101000, 0x00001100, 0x00101100,
0x00001008, 0x00101008, 0x00001108, 0x00101108,
0x04000000, 0x04100000, 0x04000100, 0x04100100,
0x04000008, 0x04100008, 0x04000108, 0x04100108,
0x04001000, 0x04101000, 0x04001100, 0x04101100,
0x04001008, 0x04101008, 0x04001108, 0x04101108,
0x00020000, 0x00120000, 0x00020100, 0x00120100,
0x00020008, 0x00120008, 0x00020108, 0x00120108,
0x00021000, 0x00121000, 0x00021100, 0x00121100,
0x00021008, 0x00121008, 0x00021108, 0x00121108,
0x04020000, 0x04120000, 0x04020100, 0x04120100,
0x04020008, 0x04120008, 0x04020108, 0x04120108,
0x04021000, 0x04121000, 0x04021100, 0x04121100,
0x04021008, 0x04121008, 0x04021108, 0x04121108,
},
{
0x00000000, 0x10000000, 0x00010000, 0x10010000,
0x00000004, 0x10000004, 0x00010004, 0x10010004,
0x20000000, 0x30000000, 0x20010000, 0x30010000,
0x20000004, 0x30000004, 0x20010004, 0x30010004,
0x00100000, 0x10100000, 0x00110000, 0x10110000,
0x00100004, 0x10100004, 0x00110004, 0x10110004,
0x20100000, 0x30100000, 0x20110000, 0x30110000,
0x20100004, 0x30100004, 0x20110004, 0x30110004,
0x00001000, 0x10001000, 0x00011000, 0x10011000,
0x00001004, 0x10001004, 0x00011004, 0x10011004,
0x20001000, 0x30001000, 0x20011000, 0x30011000,
0x20001004, 0x30001004, 0x20011004, 0x30011004,
0x00101000, 0x10101000, 0x00111000, 0x10111000,
0x00101004, 0x10101004, 0x00111004, 0x10111004,
0x20101000, 0x30101000, 0x20111000, 0x30111000,
0x20101004, 0x30101004, 0x20111004, 0x30111004,
},
{
0x00000000, 0x08000000, 0x00000008, 0x08000008,
0x00000400, 0x08000400, 0x00000408, 0x08000408,
0x00020000, 0x08020000, 0x00020008, 0x08020008,
0x00020400, 0x08020400, 0x00020408, 0x08020408,
0x00000001, 0x08000001, 0x00000009, 0x08000009,
0x00000401, 0x08000401, 0x00000409, 0x08000409,
0x00020001, 0x08020001, 0x00020009, 0x08020009,
0x00020401, 0x08020401, 0x00020409, 0x08020409,
0x02000000, 0x0A000000, 0x02000008, 0x0A000008,
0x02000400, 0x0A000400, 0x02000408, 0x0A000408,
0x02020000, 0x0A020000, 0x02020008, 0x0A020008,
0x02020400, 0x0A020400, 0x02020408, 0x0A020408,
0x02000001, 0x0A000001, 0x02000009, 0x0A000009,
0x02000401, 0x0A000401, 0x02000409, 0x0A000409,
0x02020001, 0x0A020001, 0x02020009, 0x0A020009,
0x02020401, 0x0A020401, 0x02020409, 0x0A020409,
},
{
0x00000000, 0x00000100, 0x00080000, 0x00080100,
0x01000000, 0x01000100, 0x01080000, 0x01080100,
0x00000010, 0x00000110, 0x00080010, 0x00080110,
0x01000010, 0x01000110, 0x01080010, 0x01080110,
0x00200000, 0x00200100, 0x00280000, 0x00280100,
0x01200000, 0x01200100, 0x01280000, 0x01280100,
0x00200010, 0x00200110, 0x00280010, 0x00280110,
0x01200010, 0x01200110, 0x01280010, 0x01280110,
0x00000200, 0x00000300, 0x00080200, 0x00080300,
0x01000200, 0x01000300, 0x01080200, 0x01080300,
0x00000210, 0x00000310, 0x00080210, 0x00080310,
0x01000210, 0x01000310, 0x01080210, 0x01080310,
0x00200200, 0x00200300, 0x00280200, 0x00280300,
0x01200200, 0x01200300, 0x01280200, 0x01280300,
0x00200210, 0x00200310, 0x00280210, 0x00280310,
0x01200210, 0x01200310, 0x01280210, 0x01280310,
},
{
0x00000000, 0x04000000, 0x00040000, 0x04040000,
0x00000002, 0x04000002, 0x00040002, 0x04040002,
0x00002000, 0x04002000, 0x00042000, 0x04042000,
0x00002002, 0x04002002, 0x00042002, 0x04042002,
0x00000020, 0x04000020, 0x00040020, 0x04040020,
0x00000022, 0x04000022, 0x00040022, 0x04040022,
0x00002020, 0x04002020, 0x00042020, 0x04042020,
0x00002022, 0x04002022, 0x00042022, 0x04042022,
0x00000800, 0x04000800, 0x00040800, 0x04040800,
0x00000802, 0x04000802, 0x00040802, 0x04040802,
0x00002800, 0x04002800, 0x00042800, 0x04042800,
0x00002802, 0x04002802, 0x00042802, 0x04042802,
0x00000820, 0x04000820, 0x00040820, 0x04040820,
0x00000822, 0x04000822, 0x00040822, 0x04040822,
0x00002820, 0x04002820, 0x00042820, 0x04042820,
0x00002822, 0x04002822, 0x00042822, 0x04042822
}
};
#if VECT_SIZE == 1
#define BOX(i,n,S) (S)[(n)][(i)]
#elif VECT_SIZE == 2
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#elif VECT_SIZE == 4
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#elif VECT_SIZE == 8
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#elif VECT_SIZE == 16
#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
DECLSPEC void _des_crypt_encrypt (u32 *iv, u32 *data, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_SPtrans)[64])
{
u32 r = data[0];
u32 l = data[1];
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i += 2)
{
u32 u;
u32 t;
u = Kc[i + 0] ^ hc_rotl32 (r, 30u);
t = Kd[i + 0] ^ hc_rotl32 (r, 26u);
l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
| BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
| BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
| BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
| BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
| BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
| BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
u = Kc[i + 1] ^ hc_rotl32 (l, 30u);
t = Kd[i + 1] ^ hc_rotl32 (l, 26u);
r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans)
| BOX (((u >> 8) & 0x3f), 2, s_SPtrans)
| BOX (((u >> 16) & 0x3f), 4, s_SPtrans)
| BOX (((u >> 24) & 0x3f), 6, s_SPtrans)
| BOX (((t >> 0) & 0x3f), 1, s_SPtrans)
| BOX (((t >> 8) & 0x3f), 3, s_SPtrans)
| BOX (((t >> 16) & 0x3f), 5, s_SPtrans)
| BOX (((t >> 24) & 0x3f), 7, s_SPtrans);
}
iv[0] = l;
iv[1] = r;
}
DECLSPEC void _des_crypt_keysetup (u32 c, u32 d, u32 *Kc, u32 *Kd, SHM_TYPE u32 (*s_skb)[64])
{
u32 tt;
PERM_OP (d, c, tt, 4, 0x0f0f0f0f);
HPERM_OP (c, tt, 2, 0xcccc0000);
HPERM_OP (d, tt, 2, 0xcccc0000);
PERM_OP (d, c, tt, 1, 0x55555555);
PERM_OP (c, d, tt, 8, 0x00ff00ff);
PERM_OP (d, c, tt, 1, 0x55555555);
d = ((d & 0x000000ff) << 16)
| ((d & 0x0000ff00) << 0)
| ((d & 0x00ff0000) >> 16)
| ((c & 0xf0000000) >> 4);
c = c & 0x0fffffff;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 16; i++)
{
if ((i < 2) || (i == 8) || (i == 15))
{
c = ((c >> 1) | (c << 27));
d = ((d >> 1) | (d << 27));
}
else
{
c = ((c >> 2) | (c << 26));
d = ((d >> 2) | (d << 26));
}
c = c & 0x0fffffff;
d = d & 0x0fffffff;
const u32 c00 = (c >> 0) & 0x0000003f;
const u32 c06 = (c >> 6) & 0x00383003;
const u32 c07 = (c >> 7) & 0x0000003c;
const u32 c13 = (c >> 13) & 0x0000060f;
const u32 c20 = (c >> 20) & 0x00000001;
u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb)
| BOX (((c06 >> 0) & 0xff)
|((c07 >> 0) & 0xff), 1, s_skb)
| BOX (((c13 >> 0) & 0xff)
|((c06 >> 8) & 0xff), 2, s_skb)
| BOX (((c20 >> 0) & 0xff)
|((c13 >> 8) & 0xff)
|((c06 >> 16) & 0xff), 3, s_skb);
const u32 d00 = (d >> 0) & 0x00003c3f;
const u32 d07 = (d >> 7) & 0x00003f03;
const u32 d21 = (d >> 21) & 0x0000000f;
const u32 d22 = (d >> 22) & 0x00000030;
u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb)
| BOX (((d07 >> 0) & 0xff)
|((d00 >> 8) & 0xff), 5, s_skb)
| BOX (((d07 >> 8) & 0xff), 6, s_skb)
| BOX (((d21 >> 0) & 0xff)
|((d22 >> 0) & 0xff), 7, s_skb);
Kc[i] = ((t << 16) | (s & 0x0000ffff));
Kd[i] = ((s >> 16) | (t & 0xffff0000));
}
}
DECLSPEC void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 *out)
{
u32 t[8];
t[0] = (w0 >> 0) & 0xff;
t[1] = (w0 >> 8) & 0xff;
t[2] = (w0 >> 16) & 0xff;
t[3] = (w0 >> 24) & 0xff;
t[4] = (w1 >> 0) & 0xff;
t[5] = (w1 >> 8) & 0xff;
t[6] = (w1 >> 16) & 0xff;
t[7] = (w1 >> 24) & 0xff;
u32 k[8];
k[0] = (t[0] >> 0);
k[1] = (t[0] << 7) | (t[1] >> 1);
k[2] = (t[1] << 6) | (t[2] >> 2);
k[3] = (t[2] << 5) | (t[3] >> 3);
k[4] = (t[3] << 4) | (t[4] >> 4);
k[5] = (t[4] << 3) | (t[5] >> 5);
k[6] = (t[5] << 2) | (t[6] >> 6);
k[7] = (t[6] << 1);
out[0] = ((k[0] & 0xff) << 0)
| ((k[1] & 0xff) << 8)
| ((k[2] & 0xff) << 16)
| ((k[3] & 0xff) << 24);
out[1] = ((k[4] & 0xff) << 0)
| ((k[5] & 0xff) << 8)
| ((k[6] & 0xff) << 16)
| ((k[7] & 0xff) << 24);
}
#ifdef KERNEL_STATIC
DECLSPEC u8 hex_convert (const u8 c)
{
return (c & 15) + (c >> 6) * 9;
}
DECLSPEC u8 hex_to_u8 (const u8 *hex)
{
u8 v = 0;
v |= ((u8) hex_convert (hex[1]) << 0);
v |= ((u8) hex_convert (hex[0]) << 4);
return (v);
}
#endif
typedef struct netntlm
{
u32 user_len;
u32 domain_len;
u32 srvchall_len;
u32 clichall_len;
u32 userdomain_buf[64];
u32 chall_buf[256];
} netntlm_t;
typedef struct netntlm_tmp
{
u32 digest_buf[4];
} netntlm_tmp_t;
KERNEL_FQ void m27000_init (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
if (gid >= gid_max) return;
/**
* salt
*/
u32 in[16];
in[ 0] = pws[gid].i[ 0];
in[ 1] = pws[gid].i[ 1];
in[ 2] = pws[gid].i[ 2];
in[ 3] = pws[gid].i[ 3];
in[ 4] = pws[gid].i[ 4];
in[ 5] = pws[gid].i[ 5];
in[ 6] = pws[gid].i[ 6];
in[ 7] = pws[gid].i[ 7];
u8 *in_ptr = (u8 *) in;
u32 out[4];
u8 *out_ptr = (u8 *) out;
for (int i = 0, j = 0; i < 16; i += 1, j += 2)
{
out_ptr[i] = hex_to_u8 (in_ptr + j);
}
tmps[gid].digest_buf[0] = out[ 0];
tmps[gid].digest_buf[1] = out[ 1];
tmps[gid].digest_buf[2] = out[ 2];
tmps[gid].digest_buf[3] = out[ 3];
}
KERNEL_FQ void m27000_loop (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
}
KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* sbox, kbox
*/
#ifdef REAL_SHM
LOCAL_VK u32 s_SPtrans[8][64];
LOCAL_VK u32 s_skb[8][64];
for (u32 i = lid; i < 64; i += lsz)
{
s_SPtrans[0][i] = c_SPtrans[0][i];
s_SPtrans[1][i] = c_SPtrans[1][i];
s_SPtrans[2][i] = c_SPtrans[2][i];
s_SPtrans[3][i] = c_SPtrans[3][i];
s_SPtrans[4][i] = c_SPtrans[4][i];
s_SPtrans[5][i] = c_SPtrans[5][i];
s_SPtrans[6][i] = c_SPtrans[6][i];
s_SPtrans[7][i] = c_SPtrans[7][i];
s_skb[0][i] = c_skb[0][i];
s_skb[1][i] = c_skb[1][i];
s_skb[2][i] = c_skb[2][i];
s_skb[3][i] = c_skb[3][i];
s_skb[4][i] = c_skb[4][i];
s_skb[5][i] = c_skb[5][i];
s_skb[6][i] = c_skb[6][i];
s_skb[7][i] = c_skb[7][i];
}
SYNC_THREADS ();
#else
CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans;
CONSTANT_AS u32a (*s_skb)[64] = c_skb;
#endif
if (gid >= gid_max) return;
/**
* base
*/
const u32 s0 = salt_bufs[SALT_POS].salt_buf[0];
const u32 s1 = salt_bufs[SALT_POS].salt_buf[1];
const u32 s2 = salt_bufs[SALT_POS].salt_buf[2];
const u32 a = tmps[gid].digest_buf[0];
const u32 b = tmps[gid].digest_buf[1];
const u32 c = tmps[gid].digest_buf[2];
const u32 d = tmps[gid].digest_buf[3];
// I believe this matches the last 2 bytes and throws away.
// Taken from 5500.
if ((d >> 16) != s2) return;
/**
* DES1
*/
u32 key[2];
transform_netntlmv1_key (a, b, key);
u32 Kc[16];
u32 Kd[16];
_des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
u32 data[2];
data[0] = s0;
data[1] = s1;
u32 out1[2];
_des_crypt_encrypt (out1, data, Kc, Kd, s_SPtrans);
/**
* DES2
*/
transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key);
_des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb);
u32 out2[2];
_des_crypt_encrypt (out2, data, Kc, Kd, s_SPtrans);
/**
* digest
*/
const u32 r0 = out1[0];
const u32 r1 = out1[1];
const u32 r2 = out2[0];
const u32 r3 = out2[1];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -0,0 +1,197 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
// #define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md4.cl"
#include "inc_hash_md5.cl"
#endif
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#ifdef KERNEL_STATIC
DECLSPEC u8 hex_convert (const u8 c)
{
return (c & 15) + (c >> 6) * 9;
}
DECLSPEC u8 hex_to_u8 (const u8 *hex)
{
u8 v = 0;
v |= ((u8) hex_convert (hex[1]) << 0);
v |= ((u8) hex_convert (hex[0]) << 4);
return (v);
}
#endif
typedef struct netntlm
{
u32 user_len;
u32 domain_len;
u32 srvchall_len;
u32 clichall_len;
u32 userdomain_buf[64];
u32 chall_buf[256];
} netntlm_t;
typedef struct netntlmv2_tmp
{
u32 digest_buf[4];
} netntlm_tmp_t;
KERNEL_FQ void m27100_init (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
u32 in[16];
in[ 0] = pws[gid].i[ 0];
in[ 1] = pws[gid].i[ 1];
in[ 2] = pws[gid].i[ 2];
in[ 3] = pws[gid].i[ 3];
in[ 4] = pws[gid].i[ 4];
in[ 5] = pws[gid].i[ 5];
in[ 6] = pws[gid].i[ 6];
in[ 7] = pws[gid].i[ 7];
u8 *in_ptr = (u8 *) in;
u32 out[4];
u8 *out_ptr = (u8 *) out;
for (int i = 0, j = 0; i < 16; i += 1, j += 2)
{
out_ptr[i] = hex_to_u8 (in_ptr + j);
}
tmps[gid].digest_buf[0] = out[ 0];
tmps[gid].digest_buf[1] = out[ 1];
tmps[gid].digest_buf[2] = out[ 2];
tmps[gid].digest_buf[3] = out[ 3];
}
KERNEL_FQ void m27100_loop (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
}
KERNEL_FQ void m27100_comp (KERN_ATTR_TMPS_ESALT (netntlm_tmp_t, netntlm_t))
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
if (gid >= gid_max) return;
const u64 lid = get_local_id (0);
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = tmps[gid].digest_buf[0];
w0[1] = tmps[gid].digest_buf[1];
w0[2] = tmps[gid].digest_buf[2];
w0[3] = tmps[gid].digest_buf[3];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_hmac_ctx_t ctx0;
md5_hmac_init_64 (&ctx0, w0, w1, w2, w3);
md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len);
md5_hmac_final (&ctx0);
w0[0] = ctx0.opad.h[0];
w0[1] = ctx0.opad.h[1];
w0[2] = ctx0.opad.h[2];
w0[3] = ctx0.opad.h[3];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_hmac_ctx_t ctx;
md5_hmac_init_64 (&ctx, w0, w1, w2, w3);
md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len);
md5_hmac_final (&ctx);
tmps[gid].digest_buf[0] = ctx.opad.h[0];
tmps[gid].digest_buf[1] = ctx.opad.h[1];
tmps[gid].digest_buf[2] = ctx.opad.h[2];
tmps[gid].digest_buf[3] = ctx.opad.h[3];
/**
* digest
*/
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}

@ -4,44 +4,72 @@
## Features
##
- Added option --multiply-accel-disable (short: -M) to disable multiply the kernel-accel with the multiprocessor count automatism
- Added option --multiply-accel-disable (short: -M) to disable multiplying of the kernel accel with the multiprocessor count
- Added rule function '3' to change the case of the first letter after the occurrence of N of character X
- Added support for auto tuning --kernel-threads (-T) at startup
- Added support for HIP version 4.3 or later and removed support for older HIP versions as they are not compatible
##
## Bugs
##
- Fixed autotune unitialized tmps variable for slow hashes by calling _init kernel before calling _loop kernel
- Fixed datatype in function sha384_hmac_init_vector_128() that could come into effect if vector datatype was manually set
- Fixed false negative in all VeraCrypt hash-modes if both conditions are met: 1. use CPU for cracking and 2. PIM range was used
- Fixed buffer overflow in DPAPI masterkey file v1/v2 module in hash_encode() and hash_decode()
- Fixed buffer overflow in Stargazer Stellar Wallet XLM module in hash_encode() when a hash was cracked
- Fixed false negative in all VeraCrypt hash modes if both conditions are met: 1. Use CPU for cracking and 2. PIM area was used
- Fixed invalid data type in the sha384_hmac_init_vector_128() function that take effect if the vector data type was specified manually
- Fixed out-of-boundary read in input_tokenizer() if the signature in the hash is longer than the length of the plugin's signature constant
- Fixed out-of-boundary read in the Stuffit5 module in hash_decode()
- Fixed random rule generator option --generate-rules-func-min by fixing switch() case to not select a not existing option group type
- Fixed uninitialized tmps variable in autotune for slow hashes by calling _init and _prepare kernel before calling _loop kernel
##
## Improvements
## Performance
##
- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
- Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
- Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
- Slow Kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE
- AMD GPUs: Add inline assembly code for md5crypt, sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
- AMD GPUs: On the Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32
- Backend Interface: Replace most of the blocking Compute API functions with asynchronous ones to improve GPU utilization
- Blake Kernels: Optimize 3/4 BLAKE2B_ROUND() 64-bit rotations with inline assembly hc_byte_perm_S() calls
- Blowfish Kernels: Backport optimizations to reduce bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1
- ECC secp256k1: The inline assembly code for AMD GPUs has been removed as the latest JIT compilers optimize it with the same efficiency
- HIP Kernels: Enable vector data types for HIP kernels for functionality and performance
- Kernel threads: Use warp size / wavefront size query instead of hard-coded values as the basis for kernel threads
- SCRYPT Kernels: Improve Hashcat.hctune entries for many NV and AMD GPUs for hash mode 8900, 9300, 15700 and 22700
- Tuning Database: Add new module function module_extra_tuningdb_block() to extend hashcat.hctune content from a module
##
## Technical
##
- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
- Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
- SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs
- 7-Zip Hook: Increase the supported data length from 320kb to 8mb
- ADL: Updated support for AMD Display Library to 15.0, updated data types
- AMD Driver: Updated requirements for AMD Linux drivers to ROCm 4.3 or later due to new HIP interface
- AMD Driver: Updated requirements for AMD Windows drivers to Adrenalin 21.2.1 or later due to new ADL library
- Backend Interface: Implement gpu_bzero() as a gpu_memset() replacement, since all gpu_memset() operations used 0 as the value
- Backend Interface: Improve the query kernel's dynamic memory size based on DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN instead of BF
- Brain Session: Adds hashconfig-specific opti_type and opts_type parameters to the session calculation to enable cover functions like -O
- Commandline: Throw an error if the separator specified by the user with the -p option is not exactly 1 byte
- Constants: Make const char * pointers actually const char * const pointers
- Filehandling: Use const char for fopen mode to fix -Wwrite-strings warnings
- Hardware Monitor: Added support for OverDrive 7 and 8 based GPUs
- HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows
- Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache
- Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations
- OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global()
- RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400
- Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
- Slow kernels: Set some of the slowest kernels to OPTS_TYPE_MP_MULTI_DISABLE to make it easier to handle small word lists
- Vendor Discovery: Add "Intel" as a valid vendor name for GPUs on macOS
##
## Algorithms
##
- Added hash-mode: SNMPv3 HMAC-SHA1-96
- Added hash-mode: SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
- Added hash-mode: SNMPv3 HMAC-MD5-96
- Added hash-mode: SNMPv3 HMAC-SHA1-96
- Added hash-mode: SNMPv3 HMAC-SHA224-128
- Added hash-mode: SNMPv3 HMAC-SHA256-192
* changes v6.2.2 -> v6.2.3

@ -10,8 +10,8 @@
hashcat v6.2.3
==============
AMD GPUs on Linux require "RadeonOpenCompute (ROCm)" Software Platform (3.1 or later)
AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (20.2.2 or later)
AMD GPUs on Linux require "AMD ROCm" (4.3 or later)
AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (21.2.1 or later)
Intel CPUs require "OpenCL Runtime for Intel Core and Intel Xeon Processors" (16.1.1 or later)
NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or later)
@ -155,8 +155,11 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- SIP digest authentication (MD5)
- IKE-PSK MD5
- IKE-PSK SHA1
- SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
- SNMPv3 HMAC-MD5-96
- SNMPv3 HMAC-SHA1-96
- SNMPv3 HMAC-SHA256-192
- SNMPv3 HMAC-SHA224-128
- WPA-EAPOL-PBKDF2
- WPA-EAPOL-PMK
- WPA-PBKDF2-PMKID+EAPOL

@ -5,6 +5,7 @@
#define RULE_OP_MANGLE_UREST_LFIRST 'C' // upper case all chars, lower case 1st
#define RULE_OP_MANGLE_TREST 't' // switch the case of each char
#define RULE_OP_MANGLE_TOGGLE_AT 'T' // switch the case of each char on pos N
#define RULE_OP_MANGLE_TOGGLE_AT_SEP '3' // switch the case of the first letter after occurrence N of char X
#define RULE_OP_MANGLE_REVERSE 'r' // reverse word
#define RULE_OP_MANGLE_DUPEWORD 'd' // append word to itself
#define RULE_OP_MANGLE_DUPEWORD_TIMES 'p' // append word to itself N times

@ -279,7 +279,14 @@ GeForce_RTX_3090 ALIAS_nv_sm50_or_higher
##
Device_738c ALIAS_AMD_MI100
AMD_Radeon_(TM)_RX_480_Graphics ALIAS_AMD_RX480
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] ALIAS_AMD_Vega64
AMD_Radeon_Vega_64 ALIAS_AMD_Vega64
Device_73bf ALIAS_AMD_RX6900XT
AMD_Radeon_RX_6900_XT ALIAS_AMD_RX6900XT
#############
## ENTRIES ##
@ -379,164 +386,13 @@ GeForce_GTX_TITAN 3 9900 2 A
DEVICE_TYPE_CPU * 3200 1 N A
##
## SCRYPT
## SCRYPT: Tunings for SCRYPT based hash-modes can be found inside the plugin source
## See function module_extra_tuningdb_block()
##
DEVICE_TYPE_CPU * 8900 1 N A
DEVICE_TYPE_CPU * 9300 1 N A
DEVICE_TYPE_CPU * 15700 1 N A
DEVICE_TYPE_CPU * 22700 1 N A
DEVICE_TYPE_GPU * 8900 1 N A
DEVICE_TYPE_GPU * 9300 1 N A
DEVICE_TYPE_GPU * 15700 1 1 A
DEVICE_TYPE_GPU * 22700 1 N A
##
## CryptoAPI
##
DEVICE_TYPE_CPU * 14500 1 A A
DEVICE_TYPE_GPU * 14500 1 A A
## Here's an example of how to manually tune SCRYPT algorithm kernels for your hardware.
## Manually tuning the GPU will yield increased performance. There is typically no noticeable change to CPU performance.
##
## First, you need to know the parameters of your SCRYPT hash: N, r and p.
##
## The reference SCRYPT parameter values are N=14, r=8 and p=1, but these will likely not match the parameters used by real-world applications.
## For reference, the N value represents an exponent (2^N, which we calculate by bit shifting 1 left by N bits).
## Hashcat expects this N value in decimal format: 1 << 14 = 16384
##
## Now that you have the 3 configuration items in decimal format, multiply them by 128 (underlaying crypto primitive block size).
## For example: 128 * 16384 * 8 * 1 = 16777216 = 16MB
## This is the amount of memory required for the GPU to compute the hash of one password candidate.
##
## Hashcat computes multiple password candidates in parallel - this is what allows for full utilization of the device.
## The number of password candidates that Hashcat can run in parallel is VRAM limited and depends on:
##
## 1. Compute devices' native compute units
## 2. Compute devices' native thread count
## 3. Artificial multiplier (--kernel-accel aka -n)
##
## In order to find these values:
##
## 1. On startup Hashcat will show: * Device #1: GeForce GTX 980, 3963/4043 MB, 16MCU. The 16 MCU is the number of compute units on that device.
## 2. Native thread counts are fixed values: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefronts), GPU-NVIDIA=32 (warps)
##
## Now multiply them together. For my GTX980: 16 * 32 * 16777216 = 8589934592 = 8GB
##
## If we want to actually make use of all computing resources, this GPU would require 8GB of GPU RAM.
## However, it doesn't have that:
##
## Device #1: GeForce GTX 980, 3963/4043 MB, 16MCU. We only have 4043 MB (4GB minus some overhead from the OS).
##
## How do we deal with this? This is where SCRYPT TMTO(time-memory trde off) kicks in. The SCRYPT algorithm is designed in such a way that we
## can pre-compute that 16MB buffer from a self-choosen offset. Details on how this actually works are not important for this process.
##
## What's relevant to us is that we can halve the buffer size, but we pay with twice the computation time.
## We can repeat this as often as we want. That's why it's a trade-off.
##
## This mechanic can be manually set using --scrypt-tmto on the commandline, but this is not the best way.
##
## Back to our problem. We need 8GB of memory but have only ~4GB.
## It's not a full 4GB. The OS needs some of it and Hashcat needs some of it to store password candidates and other things.
## If you run a headless server it should be safe to subtract a fixed value of 200MB from whatever you have in your GPU.
##
## So lets divide our required memory(8GB) by 2 until it fits in our VRAM - 200MB.
##
## (8GB >> 0) = 8GB < 3.8GB = No, Does not fit
## (8GB >> 1) = 4GB < 3.8GB = No, Does not fit
## (8GB >> 2) = 2GB < 3.8GB = Yes!
##
## This process is automated in Hashcat, but it is important to understand what's happening here.
## Because of the light overhead from the OS and Hashcat, we pay a very high price.
## Even though it is just 200MB, it forces us to increase the TMTO by another step.
## In terms of speed, the speed is now only 1/4 of what we could archieve on that same GPU if it had only 8.2GB ram.
## But now we end up in a situation that we waste 1.8GB RAM which costs us ((1.8GB/16MB)>>1) candidates/second.
##
## This is where manual tuning can come into play.
## If we know that the resources we need are close to what we have (in this case 3.8GB <-> 4.0GB)
## We could decide to throw away some of our compute units so that we will no longer need 4.0GB but only 3.8GB.
## Therefore, we do not need to increase the TMTO by another step to fit in VRAM.
##
## If we cut down our 16 MCU to only 15 MCU or 14 MCU using --kernel-accel(-n), we end up with:
##
## 16 * 32 * 16777216 = 8589934592 / 2 = 4294967296 = 4.00GB < 3.80GB = Nope, next
## 15 * 32 * 16777216 = 8053063680 / 2 = 4026531840 = 3.84GB < 3.80GB = Nope, next
## 14 * 32 * 16777216 = 7516192768 / 2 = 3758096384 = 3.58GB < 3.80GB = Yes!
##
## So we can throw away 2/16 compute units, but save half of the computation trade-off on the rest of the compute device.
## On my GTX980, this improves the performance from 163 H/s to 201 H/s.
## You don't need to control --scrypt-tmto manually because now that the multiplier (-n) is smaller than the native value
## Hashcat will automatically realize it can decrease the TMTO by one.
##
## At this point, you found the optimal base value for your compute device. In this case: 14.
##
## Depending on our hardware, especially hardware with very slow memory access like a GPU
## there's a good chance that it's cheaper (faster) to compute an extra step on the GPU register.
## So if we increase the TMTO again by one, this gives an extra speed boost.
##
## On my GTX980, this improves the performance from 201 H/s to 255 H/s.
## Again, there's no need to control this with --scrypt-tmto. Hashcat will realize it has to increase the TMTO again.
##
## All together, you can control all of this by using the -n parameter in the command line.
## This is not ideal in a production environment because you must use the --force flag.
## The best way to set this is by using this Hashcat.hctune file to store it. This avoids the need to bypass any warnings.
##
## Find the ideal -n value, then store it here along with the proper compute device name.
## Formatting guidelines are availabe at the top of this document.
## 4GB
GeForce_GTX_980 * 8900 1 28 A
GeForce_GTX_980 * 9300 1 128 A
GeForce_GTX_980 * 15700 1 28 A
GeForce_GTX_980 * 22700 1 28 A
## 8GB
GeForce_GTX_1080 * 8900 1 14 A
GeForce_GTX_1080 * 9300 1 256 A
GeForce_GTX_1080 * 15700 1 14 A
GeForce_GTX_1080 * 22700 1 14 A
## 11GB
GeForce_RTX_2080_Ti * 8900 1 68 A
GeForce_RTX_2080_Ti * 9300 1 532 A
GeForce_RTX_2080_Ti * 15700 1 68 A
GeForce_RTX_2080_Ti * 22700 1 68 A
## 8GB
GeForce_RTX_3060_Ti * 8900 1 51 A
GeForce_RTX_3060_Ti * 9300 1 256 A
GeForce_RTX_3060_Ti * 15700 1 11 A
GeForce_RTX_3060_Ti * 22700 1 43 A
## 8GB
GeForce_RTX_3070 * 8900 1 46 A
GeForce_RTX_3070 * 9300 1 368 A
GeForce_RTX_3070 * 15700 1 22 A
GeForce_RTX_3070 * 22700 1 46 A
## 4GB
AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A
AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A
AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A
AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A
## 8GB
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 8900 1 28 A
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 9300 1 442 A
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 15700 1 28 A
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 22700 1 28 A
## 32GB, WF64
ALIAS_AMD_MI100 * 8900 1 76 A
ALIAS_AMD_MI100 * 9300 1 288 A
ALIAS_AMD_MI100 * 15700 1 76 A
ALIAS_AMD_MI100 * 22700 1 76 A
## 16GB, WF32
ALIAS_AMD_RX6900XT * 8900 1 62 A
ALIAS_AMD_RX6900XT * 9300 1 704 A
ALIAS_AMD_RX6900XT * 15700 1 62 A
ALIAS_AMD_RX6900XT * 22700 1 62 A

@ -69,13 +69,12 @@ int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc,
int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
int hc_cuMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
int hc_cuMemsetD32Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
int hc_cuMemsetD8Async (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
@ -89,53 +88,50 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state,
int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state);
int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut);
int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options);
int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet);
int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log);
int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor);
int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev);
int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config);
int hc_hipCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
int hc_hipDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog);
int hc_hipCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options);
int hc_hipGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet);
int hc_hipGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log);
int hc_hipGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *codeSizeRet);
int hc_hipGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *code);
int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx);
int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx);
int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx);
int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev);
int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, hipDevice_t *device, int ordinal);
int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count);
int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal);
int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev);
int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev);
int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, hipDevice_t dev);
int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, hipDevice_t dev);
int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags);
int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd);
int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream);
int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc);
//int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value);
int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream);
int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent);
int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc);
int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra);
int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize);
int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount);
int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount);
int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr);
int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name);
int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues);
int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod);
int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags);
int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx);
int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut);
int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues);
int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state);
int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut);
int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dptr);
int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
int hc_hipMemsetD32Async (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N, hipStream_t hStream);
int hc_hipMemsetD8Async (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N, hipStream_t hStream);
int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name);
int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod);
int hc_hipRuntimeGetVersion (hashcat_ctx_t *hashcat_ctx, int *runtimeVersion);
int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream);
int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
int hc_clCompileProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
@ -146,6 +142,7 @@ int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program
int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_program *program);
int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program);
int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
int hc_clEnqueueFillBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf);
int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
int hc_clEnqueueReadBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event);
@ -181,17 +178,20 @@ void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 p
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size);
int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size);
int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size);
int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size);
int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num);
int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8 value, const u64 size);
int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size);
int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size);
int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u8 value, const u64 size);
int run_opencl_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 offset, const u32 value, const u64 size);
int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration);

@ -63,6 +63,14 @@
#define HC_API_CALL
#endif
#if defined (__GNUC__)
#define HC_ALIGN(x) __attribute__((aligned(x)))
#elif defined (_MSC_VER)
#define HC_ALIGN(x) __declspec(align(x))
#else
#define HC_ALIGN(x)
#endif
#if defined (_WIN)
#define WIN32_LEAN_AND_MEAN
#endif

@ -13,22 +13,120 @@
#include <windows.h>
#endif // _WIN
// Values taken from display-library-14.0.zip
// Declarations from:
// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_defines.h
// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_structures.h
/**
* Declarations from adl_defines.h
*/
/// Defines ADL_TRUE
#define ADL_TRUE 1
/// Defines ADL_FALSE
#define ADL_FALSE 0
//Define Performance Metrics Log max sensors number
#define ADL_PMLOG_MAX_SENSORS 256
typedef enum ADLSensorType
{
SENSOR_MAXTYPES = 0,
PMLOG_CLK_GFXCLK = 1,
PMLOG_CLK_MEMCLK = 2,
PMLOG_CLK_SOCCLK = 3,
PMLOG_CLK_UVDCLK1 = 4,
PMLOG_CLK_UVDCLK2 = 5,
PMLOG_CLK_VCECLK = 6,
PMLOG_CLK_VCNCLK = 7,
PMLOG_TEMPERATURE_EDGE = 8,
PMLOG_TEMPERATURE_MEM = 9,
PMLOG_TEMPERATURE_VRVDDC = 10,
PMLOG_TEMPERATURE_VRMVDD = 11,
PMLOG_TEMPERATURE_LIQUID = 12,
PMLOG_TEMPERATURE_PLX = 13,
PMLOG_FAN_RPM = 14,
PMLOG_FAN_PERCENTAGE = 15,
PMLOG_SOC_VOLTAGE = 16,
PMLOG_SOC_POWER = 17,
PMLOG_SOC_CURRENT = 18,
PMLOG_INFO_ACTIVITY_GFX = 19,
PMLOG_INFO_ACTIVITY_MEM = 20,
PMLOG_GFX_VOLTAGE = 21,
PMLOG_MEM_VOLTAGE = 22,
PMLOG_ASIC_POWER = 23,
PMLOG_TEMPERATURE_VRSOC = 24,
PMLOG_TEMPERATURE_VRMVDD0 = 25,
PMLOG_TEMPERATURE_VRMVDD1 = 26,
PMLOG_TEMPERATURE_HOTSPOT = 27,
PMLOG_TEMPERATURE_GFX = 28,
PMLOG_TEMPERATURE_SOC = 29,
PMLOG_GFX_POWER = 30,
PMLOG_GFX_CURRENT = 31,
PMLOG_TEMPERATURE_CPU = 32,
PMLOG_CPU_POWER = 33,
PMLOG_CLK_CPUCLK = 34,
PMLOG_THROTTLER_STATUS = 35,
PMLOG_CLK_VCN1CLK1 = 36,
PMLOG_CLK_VCN1CLK2 = 37,
PMLOG_SMART_POWERSHIFT_CPU = 38,
PMLOG_SMART_POWERSHIFT_DGPU = 39,
PMLOG_BUS_SPEED = 40,
PMLOG_BUS_LANES = 41,
PMLOG_MAX_SENSORS_REAL
} ADLSensorType;
/// Defines the maximum string length
#define ADL_MAX_CHAR 4096
/// Defines the maximum string length
#define ADL_MAX_PATH 256
/// Defines the maximum number of supported adapters
#define ADL_MAX_ADAPTERS 250
/// Defines the maxumum number of supported displays
#define ADL_MAX_DISPLAYS 150
/// Defines the maxumum string length for device name
#define ADL_MAX_DEVICENAME 32
/// Defines for all adapters
#define ADL_ADAPTER_INDEX_ALL -1
/// \defgroup define_adl_results Result Codes
/// This group of definitions are the various results returned by all ADL functions \n
/// @{
/// All OK, but need to wait
#define ADL_OK_WAIT 4
/// All OK, but need restart
#define ADL_OK_RESTART 3
/// All OK but need mode change
#define ADL_OK_MODE_CHANGE 2
/// All OK, but with warning
#define ADL_OK_WARNING 1
/// ADL function completed successfully
#define ADL_OK 0
/// Generic Error. Most likely one or more of the Escape calls to the driver failed!
#define ADL_ERR -1
/// ADL not initialized
#define ADL_ERR_NOT_INIT -2
/// One of the parameter passed is invalid
#define ADL_ERR_INVALID_PARAM -3
/// One of the parameter size is invalid
#define ADL_ERR_INVALID_PARAM_SIZE -4
/// Invalid ADL index passed
#define ADL_ERR_INVALID_ADL_IDX -5
/// Invalid controller index passed
#define ADL_ERR_INVALID_CONTROLLER_IDX -6
/// Invalid display index passed
#define ADL_ERR_INVALID_DIPLAY_IDX -7
/// Function not supported by the driver
#define ADL_ERR_NOT_SUPPORTED -8
/// Defines the maximum string length
#define ADL_MAX_PATH 256
/// Null Pointer error
#define ADL_ERR_NULL_POINTER -9
/// Call can't be made due to disabled adapter
#define ADL_ERR_DISABLED_ADAPTER -10
/// Invalid Callback
#define ADL_ERR_INVALID_CALLBACK -11
/// Display Resource conflict
#define ADL_ERR_RESOURCE_CONFLICT -12
//Failed to update some of the values. Can be returned by set request that include multiple values if not all values were successfully committed.
#define ADL_ERR_SET_INCOMPLETE -20
/// There's no Linux XDisplay in Linux Console environment
#define ADL_ERR_NO_XDISPLAY -21
//values for ADLFanSpeedValue.iSpeedType
#define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT 1
@ -37,9 +135,6 @@
//values for ADLFanSpeedValue.iFlags
#define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED 1
//Define Performance Metrics Log max sensors number
#define ADL_PMLOG_MAX_SENSORS 256
/**
* Declarations from adl_structures.h
*/
@ -90,6 +185,7 @@ typedef struct AdapterInfo
char strPNPString[ADL_MAX_PATH];
/// It is generated from EnumDisplayDevices.
int iOSDisplayIndex;
#endif /* (_WIN32) || (_WIN64) */
#if defined (LINUX)
@ -181,90 +277,6 @@ typedef struct ADLFanSpeedValue
int iFlags;
} ADLFanSpeedValue;
/////////////////////////////////////////////////////////////////////////////////////////////
///\brief Structure containing information about the display device.
///
/// This structure is used to store display device information
/// such as display index, type, name, connection status, mapped adapter and controller indexes,
/// whether or not multiple VPUs are supported, local display connections or not (through Lasso), etc.
/// This information can be returned to the user. Alternatively, it can be used to access various driver calls to set
/// or fetch various display device related settings upon the user's request.
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct ADLDisplayID
{
/// The logical display index belonging to this adapter.
int iDisplayLogicalIndex;
///\brief The physical display index.
/// For example, display index 2 from adapter 2 can be used by current adapter 1.\n
/// So current adapter may enumerate this adapter as logical display 7 but the physical display
/// index is still 2.
int iDisplayPhysicalIndex;
/// The persistent logical adapter index for the display.
int iDisplayLogicalAdapterIndex;
///\brief The persistent physical adapter index for the display.
/// It can be the current adapter or a non-local adapter. \n
/// If this adapter index is different than the current adapter,
/// the Display Non Local flag is set inside DisplayInfoValue.
int iDisplayPhysicalAdapterIndex;
} ADLDisplayID, *LPADLDisplayID;
/////////////////////////////////////////////////////////////////////////////////////////////
///\brief Structure containing information about the display device.
///
/// This structure is used to store various information about the display device. This
/// information can be returned to the user, or used to access various driver calls to set
/// or fetch various display-device-related settings upon the user's request
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct ADLDisplayInfo
{
/// The DisplayID structure
ADLDisplayID displayID;
///\deprecated The controller index to which the display is mapped.\n Will not be used in the future\n
int iDisplayControllerIndex;
/// The display's EDID name.
char strDisplayName[ADL_MAX_PATH];
/// The display's manufacturer name.
char strDisplayManufacturerName[ADL_MAX_PATH];
/// The Display type. For example: CRT, TV, CV, DFP.
int iDisplayType;
/// The display output type. For example: HDMI, SVIDEO, COMPONMNET VIDEO.
int iDisplayOutputType;
/// The connector type for the device.
int iDisplayConnector;
///\brief The bit mask identifies the number of bits ADLDisplayInfo is currently using. \n
/// It will be the sum all the bit definitions in ADL_DISPLAY_DISPLAYINFO_xxx.
int iDisplayInfoMask;
/// The bit mask identifies the display status. \ref define_displayinfomask
int iDisplayInfoValue;
} ADLDisplayInfo, *LPADLDisplayInfo;
/////////////////////////////////////////////////////////////////////////////////////////////
/// \brief Structure containing information about the BIOS.
///
/// This structure is used to store various information about the Chipset. This
/// information can be returned to the user.
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct ADLBiosInfo
{
char strPartNumber[ADL_MAX_PATH]; ///< Part number.
char strVersion[ADL_MAX_PATH]; ///< Version number.
char strDate[ADL_MAX_PATH]; ///< BIOS date in yyyy/mm/dd hh:mm format.
} ADLBiosInfo, *LPADLBiosInfo;
/////////////////////////////////////////////////////////////////////////////////////////////
///\brief Structure containing information about current power management related activity.
///
@ -346,7 +358,7 @@ typedef struct ADLODParameters
/// This structure is used to store information about Overdrive 6 fan speed information
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6FanSpeedInfo
typedef struct ADLOD6FanSpeedInfo
{
/// Contains a bitmap of the valid fan speed type flags. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM, \ref ADL_OD6_FANSPEED_USER_DEFINED
int iSpeedType;
@ -368,7 +380,7 @@ typedef struct _ADLOD6FanSpeedInfo
/// This structure is used to store information about Overdrive 6 fan speed value
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6FanSpeedValue
typedef struct ADLOD6FanSpeedValue
{
/// Indicates the units of the fan speed. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM
int iSpeedType;
@ -388,7 +400,7 @@ typedef struct _ADLOD6FanSpeedValue
/// This structure is used to store information about current Overdrive 6 performance status.
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6CurrentStatus
typedef struct ADLOD6CurrentStatus
{
/// Current engine clock in 10 KHz.
int iEngineClock;
@ -419,7 +431,7 @@ typedef struct _ADLOD6CurrentStatus
/// This structure is used to store information about Overdrive 6 clock range
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6ParameterRange
typedef struct ADLOD6ParameterRange
{
/// The starting value of the clock range
int iMin;
@ -436,7 +448,7 @@ typedef struct _ADLOD6ParameterRange
/// This structure is used to store information about Overdrive 6 capabilities
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6Capabilities
typedef struct ADLOD6Capabilities
{
/// Contains a bitmap of the OD6 capability flags. Possible values: \ref ADL_OD6_CAPABILITY_SCLK_CUSTOMIZATION,
/// \ref ADL_OD6_CAPABILITY_MCLK_CUSTOMIZATION, \ref ADL_OD6_CAPABILITY_GPU_ACTIVITY_MONITOR
@ -487,7 +499,7 @@ typedef struct ADLODPerformanceLevel
/// This structure is used to store information about Overdrive 6 clock values.
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6PerformanceLevel
typedef struct ADLOD6PerformanceLevel
{
/// Engine (core) clock.
int iEngineClock;
@ -504,7 +516,7 @@ typedef struct _ADLOD6PerformanceLevel
/// are contained in the aLevels array.
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLOD6StateInfo
typedef struct ADLOD6StateInfo
{
/// Number of levels. OD6 uses clock ranges instead of discrete performance levels.
/// iNumberOfPerformanceLevels is always 2. The 1st level indicates the minimum clocks
@ -544,63 +556,18 @@ typedef struct ADLODPerformanceLevels
/// This structure is used to store information about Performance Metrics data output
/// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////////
typedef struct _ADLSingleSensorData
typedef struct ADLSingleSensorData
{
int supported;
int value;
} ADLSingleSensorData;
typedef struct _ADLPMLogDataOutput
typedef struct ADLPMLogDataOutput
{
int size;
ADLSingleSensorData sensors[ADL_PMLOG_MAX_SENSORS];
}ADLPMLogDataOutput;
typedef enum _ADLSensorType
{
SENSOR_MAXTYPES = 0,
PMLOG_CLK_GFXCLK = 1,
PMLOG_CLK_MEMCLK = 2,
PMLOG_CLK_SOCCLK = 3,
PMLOG_CLK_UVDCLK1 = 4,
PMLOG_CLK_UVDCLK2 = 5,
PMLOG_CLK_VCECLK = 6,
PMLOG_CLK_VCNCLK = 7,
PMLOG_TEMPERATURE_EDGE = 8,
PMLOG_TEMPERATURE_MEM = 9,
PMLOG_TEMPERATURE_VRVDDC = 10,
PMLOG_TEMPERATURE_VRMVDD = 11,
PMLOG_TEMPERATURE_LIQUID = 12,
PMLOG_TEMPERATURE_PLX = 13,
PMLOG_FAN_RPM = 14,
PMLOG_FAN_PERCENTAGE = 15,
PMLOG_SOC_VOLTAGE = 16,
PMLOG_SOC_POWER = 17,
PMLOG_SOC_CURRENT = 18,
PMLOG_INFO_ACTIVITY_GFX = 19,
PMLOG_INFO_ACTIVITY_MEM = 20,
PMLOG_GFX_VOLTAGE = 21,
PMLOG_MEM_VOLTAGE = 22,
PMLOG_ASIC_POWER = 23,
PMLOG_TEMPERATURE_VRSOC = 24,
PMLOG_TEMPERATURE_VRMVDD0 = 25,
PMLOG_TEMPERATURE_VRMVDD1 = 26,
PMLOG_TEMPERATURE_HOTSPOT = 27,
PMLOG_TEMPERATURE_GFX = 28,
PMLOG_TEMPERATURE_SOC = 29,
PMLOG_GFX_POWER = 30,
PMLOG_GFX_CURRENT = 31,
PMLOG_TEMPERATURE_CPU = 32,
PMLOG_CPU_POWER = 33,
PMLOG_CLK_CPUCLK = 34,
PMLOG_THROTTLER_STATUS = 35,
PMLOG_CLK_VCN1CLK1 = 36,
PMLOG_CLK_VCN1CLK2 = 37,
PMLOG_SMART_POWERSHIFT_CPU = 38,
PMLOG_SMART_POWERSHIFT_DGPU = 39,
PMLOG_MAX_SENSORS_REAL
} ADLSensorType;
/// \brief Handle to ADL client context.
///
/// ADL clients obtain context handle from initial call to \ref ADL2_Main_Control_Create.
@ -628,7 +595,6 @@ typedef int HM_ADAPTER_ADL;
typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET ) ( int, int* );
typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET ) ( LPAdapterInfo, int );
typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET ) ( int, int *, ADLDisplayInfo **, int );
typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )();
typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity);
@ -654,7 +620,6 @@ typedef struct hm_adl_lib
ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get;
ADL_ADAPTER_ADAPTERINFO_GET ADL_Adapter_AdapterInfo_Get;
ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
ADL_DISPLAY_DISPLAYINFO_GET ADL_Display_DisplayInfo_Get;
ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
ADL_OVERDRIVE5_CURRENTACTIVITY_GET ADL_Overdrive5_CurrentActivity_Get;

@ -46,6 +46,7 @@ typedef cl_context (CL_API_CALL *OCL_CLCREATECONTEXT) (const cl_
typedef cl_kernel (CL_API_CALL *OCL_CLCREATEKERNEL) (cl_program, const char *, cl_int *);
typedef cl_program (CL_API_CALL *OCL_CLCREATEPROGRAMWITHBINARY) (cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
typedef cl_program (CL_API_CALL *OCL_CLCREATEPROGRAMWITHSOURCE) (cl_context, cl_uint, const char **, const size_t *, cl_int *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUEFILLBUFFER) (cl_command_queue, cl_mem, const void *, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUECOPYBUFFER) (cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
typedef void * (CL_API_CALL *OCL_CLENQUEUEMAPBUFFER) (cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event *, cl_event *, cl_int *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUENDRANGEKERNEL) (cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
@ -87,6 +88,7 @@ typedef struct hc_opencl_lib
OCL_CLCREATEPROGRAMWITHBINARY clCreateProgramWithBinary;
OCL_CLCREATEPROGRAMWITHSOURCE clCreateProgramWithSource;
OCL_CLENQUEUECOPYBUFFER clEnqueueCopyBuffer;
OCL_CLENQUEUEFILLBUFFER clEnqueueFillBuffer;
OCL_CLENQUEUEMAPBUFFER clEnqueueMapBuffer;
OCL_CLENQUEUENDRANGEKERNEL clEnqueueNDRangeKernel;
OCL_CLENQUEUEREADBUFFER clEnqueueReadBuffer;

@ -1028,17 +1028,14 @@ typedef CUresult (CUDA_API_CALL *CUDA_CUINIT) (unsigned int);
typedef CUresult (CUDA_API_CALL *CUDA_CULAUNCHKERNEL) (CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOC) (CUdeviceptr *, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOCHOST) (void **, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOD) (CUdeviceptr, CUdeviceptr, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTODASYNC) (CUdeviceptr, CUdeviceptr, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOH) (void *, CUdeviceptr, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOHASYNC) (void *, CUdeviceptr, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTOD) (CUdeviceptr, const void *, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTODASYNC) (CUdeviceptr, const void *, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE) (CUdeviceptr);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST) (void *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO) (size_t *, size_t *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32) (CUdeviceptr, unsigned int, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8) (CUdeviceptr, unsigned char, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC) (CUdeviceptr, unsigned int, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC) (CUdeviceptr, unsigned char, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION) (CUfunction *, CUmodule, const char *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL) (CUdeviceptr *, size_t *, CUmodule, const char *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD) (CUmodule *, const char *);
@ -1093,17 +1090,14 @@ typedef struct hc_cuda_lib
CUDA_CULAUNCHKERNEL cuLaunchKernel;
CUDA_CUMEMALLOC cuMemAlloc;
CUDA_CUMEMALLOCHOST cuMemAllocHost;
CUDA_CUMEMCPYDTOD cuMemcpyDtoD;
CUDA_CUMEMCPYDTODASYNC cuMemcpyDtoDAsync;
CUDA_CUMEMCPYDTOH cuMemcpyDtoH;
CUDA_CUMEMCPYDTOHASYNC cuMemcpyDtoHAsync;
CUDA_CUMEMCPYHTOD cuMemcpyHtoD;
CUDA_CUMEMCPYHTODASYNC cuMemcpyHtoDAsync;
CUDA_CUMEMFREE cuMemFree;
CUDA_CUMEMFREEHOST cuMemFreeHost;
CUDA_CUMEMGETINFO cuMemGetInfo;
CUDA_CUMEMSETD32 cuMemsetD32;
CUDA_CUMEMSETD8 cuMemsetD8;
CUDA_CUMEMSETD32ASYNC cuMemsetD32Async;
CUDA_CUMEMSETD8ASYNC cuMemsetD8Async;
CUDA_CUMODULEGETFUNCTION cuModuleGetFunction;
CUDA_CUMODULEGETGLOBAL cuModuleGetGlobal;
CUDA_CUMODULELOAD cuModuleLoad;

File diff suppressed because it is too large Load Diff

@ -6,41 +6,26 @@
#ifndef _EXT_HIPRTC_H
#define _EXT_HIPRTC_H
/**
* from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h)
*/
// start: amd_detail/hiprtc.h
/**
* \ingroup error
* \brief The enumerated type hiprtcResult defines API call result codes.
* HIPRTC API functions return hiprtcResult to indicate the call
* result.
*/
typedef enum {
HIPRTC_SUCCESS = 0,
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
HIPRTC_ERROR_INVALID_INPUT = 3,
HIPRTC_ERROR_INVALID_PROGRAM = 4,
HIPRTC_ERROR_INVALID_OPTION = 5,
HIPRTC_ERROR_COMPILATION = 6,
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
HIPRTC_ERROR_INTERNAL_ERROR = 11
typedef enum hiprtcResult {
HIPRTC_SUCCESS = 0,
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
HIPRTC_ERROR_INVALID_INPUT = 3,
HIPRTC_ERROR_INVALID_PROGRAM = 4,
HIPRTC_ERROR_INVALID_OPTION = 5,
HIPRTC_ERROR_COMPILATION = 6,
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
HIPRTC_ERROR_INTERNAL_ERROR = 11
} hiprtcResult;
/**
* \ingroup compilation
* \brief hiprtcProgram is the unit of compilation, and an opaque handle for
* a program.
*
* To compile a CUDA program string, an instance of hiprtcProgram must be
* created first with ::hiprtcCreateProgram, then compiled with
* ::hiprtcCompileProgram.
*/
typedef struct _hiprtcProgram *hiprtcProgram;
typedef struct _hiprtcProgram* hiprtcProgram;
// stop: amd_detail/hiprtc.h
#ifdef _WIN32
#define HIPRTCAPI __stdcall
@ -54,13 +39,12 @@ typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION) (hiprtc
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM) (hiprtcProgram, int, const char * const *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM) (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM) (hiprtcProgram *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODE) (hiprtcProgram, char *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODESIZE) (hiprtcProgram, size_t *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME) (hiprtcProgram, const char * const, const char **);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX) (hiprtcProgram, char *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE) (hiprtcProgram, size_t *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG) (hiprtcProgram, char *);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE) (hiprtcProgram, size_t *);
typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult);
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION) (int *, int *);
typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult);
typedef struct hc_hiprtc_lib
{
@ -70,13 +54,12 @@ typedef struct hc_hiprtc_lib
HIPRTC_HIPRTCCOMPILEPROGRAM hiprtcCompileProgram;
HIPRTC_HIPRTCCREATEPROGRAM hiprtcCreateProgram;
HIPRTC_HIPRTCDESTROYPROGRAM hiprtcDestroyProgram;
HIPRTC_HIPRTCGETCODE hiprtcGetCode;
HIPRTC_HIPRTCGETCODESIZE hiprtcGetCodeSize;
HIPRTC_HIPRTCGETLOWEREDNAME hiprtcGetLoweredName;
HIPRTC_HIPRTCGETPTX hiprtcGetCode;
HIPRTC_HIPRTCGETPTXSIZE hiprtcGetCodeSize;
HIPRTC_HIPRTCGETPROGRAMLOG hiprtcGetProgramLog;
HIPRTC_HIPRTCGETPROGRAMLOGSIZE hiprtcGetProgramLogSize;
HIPRTC_HIPRTCGETERRORSTRING hiprtcGetErrorString;
HIPRTC_HIPRTCVERSION hiprtcVersion;
} hc_hiprtc_lib_t;

@ -15,7 +15,7 @@ int hash_encode (const hashconfig_t *hashconfig, const hashes_t *hashes, const m
int save_hash (hashcat_ctx_t *hashcat_ctx);
void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, plain_t *plain);
int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, plain_t *plain);
//int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 salt_pos);
int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param);

@ -13,7 +13,7 @@
#include <limits.h>
#include <inttypes.h>
static const int MODULE_INTERFACE_VERSION_MINIMUM = 520;
static const int MODULE_INTERFACE_VERSION_MINIMUM = 630;
static const int MODULE_HASH_MODES_MAXIMUM = 100000;

@ -18,6 +18,7 @@ u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *ha
u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u32 module_forced_outfile_format (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);
const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra);

@ -11,9 +11,13 @@
#define TUNING_DB_FILE "hashcat.hctune"
int sort_by_tuning_db_alias (const void *v1, const void *v2);
int sort_by_tuning_db_entry (const void *v1, const void *v2);
int tuning_db_init (hashcat_ctx_t *hashcat_ctx);
void tuning_db_destroy (hashcat_ctx_t *hashcat_ctx);
bool tuning_db_process_line (hashcat_ctx_t *hashcat_ctx, const char *line_buf, const int line_num);
tuning_db_entry_t *tuning_db_search (hashcat_ctx_t *hashcat_ctx, const char *device_name, const cl_device_type device_type, int attack_mode, const int hash_mode);
#endif // _TUNINGDB_H

@ -295,6 +295,7 @@ typedef enum rule_functions
RULE_OP_MANGLE_UREST_LFIRST = 'C',
RULE_OP_MANGLE_TREST = 't',
RULE_OP_MANGLE_TOGGLE_AT = 'T',
RULE_OP_MANGLE_TOGGLE_AT_SEP = '3',
RULE_OP_MANGLE_REVERSE = 'r',
RULE_OP_MANGLE_DUPEWORD = 'd',
RULE_OP_MANGLE_DUPEWORD_TIMES = 'p',
@ -463,6 +464,7 @@ typedef enum dgst_size
DGST_SIZE_4_32 = (32 * sizeof (u32)), // 128 !!!
DGST_SIZE_4_64 = (64 * sizeof (u32)), // 256
DGST_SIZE_8_2 = (2 * sizeof (u64)), // 16 !!!
DGST_SIZE_8_4 = (4 * sizeof (u64)), // 32 !!!
DGST_SIZE_8_6 = (6 * sizeof (u64)), // 48 !!!
DGST_SIZE_8_8 = (8 * sizeof (u64)), // 64 !!!
DGST_SIZE_8_16 = (16 * sizeof (u64)), // 128 !!!
@ -927,7 +929,6 @@ typedef struct hashes
void *digests_buf;
u32 *digests_shown;
u32 *digests_shown_tmp;
u32 salts_cnt;
u32 salts_done;
@ -1428,6 +1429,7 @@ typedef struct hc_device_param
CUevent cuda_event1;
CUevent cuda_event2;
CUevent cuda_event3;
CUmodule cuda_module;
CUmodule cuda_module_shared;
@ -1503,80 +1505,81 @@ typedef struct hc_device_param
int hip_warp_size;
HIPdevice hip_device;
HIPcontext hip_context;
HIPstream hip_stream;
HIPevent hip_event1;
HIPevent hip_event2;
HIPmodule hip_module;
HIPmodule hip_module_shared;
HIPmodule hip_module_mp;
HIPmodule hip_module_amp;
HIPfunction hip_function1;
HIPfunction hip_function12;
HIPfunction hip_function2p;
HIPfunction hip_function2;
HIPfunction hip_function2e;
HIPfunction hip_function23;
HIPfunction hip_function3;
HIPfunction hip_function4;
HIPfunction hip_function_init2;
HIPfunction hip_function_loop2p;
HIPfunction hip_function_loop2;
HIPfunction hip_function_mp;
HIPfunction hip_function_mp_l;
HIPfunction hip_function_mp_r;
HIPfunction hip_function_amp;
HIPfunction hip_function_tm;
HIPfunction hip_function_memset;
HIPfunction hip_function_bzero;
HIPfunction hip_function_atinit;
HIPfunction hip_function_utf8toutf16le;
HIPfunction hip_function_decompress;
HIPfunction hip_function_aux1;
HIPfunction hip_function_aux2;
HIPfunction hip_function_aux3;
HIPfunction hip_function_aux4;
HIPdeviceptr hip_d_pws_buf;
HIPdeviceptr hip_d_pws_amp_buf;
HIPdeviceptr hip_d_pws_comp_buf;
HIPdeviceptr hip_d_pws_idx;
HIPdeviceptr hip_d_rules;
HIPdeviceptr hip_d_rules_c;
HIPdeviceptr hip_d_combs;
HIPdeviceptr hip_d_combs_c;
HIPdeviceptr hip_d_bfs;
HIPdeviceptr hip_d_bfs_c;
HIPdeviceptr hip_d_tm_c;
HIPdeviceptr hip_d_bitmap_s1_a;
HIPdeviceptr hip_d_bitmap_s1_b;
HIPdeviceptr hip_d_bitmap_s1_c;
HIPdeviceptr hip_d_bitmap_s1_d;
HIPdeviceptr hip_d_bitmap_s2_a;
HIPdeviceptr hip_d_bitmap_s2_b;
HIPdeviceptr hip_d_bitmap_s2_c;
HIPdeviceptr hip_d_bitmap_s2_d;
HIPdeviceptr hip_d_plain_bufs;
HIPdeviceptr hip_d_digests_buf;
HIPdeviceptr hip_d_digests_shown;
HIPdeviceptr hip_d_salt_bufs;
HIPdeviceptr hip_d_esalt_bufs;
HIPdeviceptr hip_d_tmps;
HIPdeviceptr hip_d_hooks;
HIPdeviceptr hip_d_result;
HIPdeviceptr hip_d_extra0_buf;
HIPdeviceptr hip_d_extra1_buf;
HIPdeviceptr hip_d_extra2_buf;
HIPdeviceptr hip_d_extra3_buf;
HIPdeviceptr hip_d_root_css_buf;
HIPdeviceptr hip_d_markov_css_buf;
HIPdeviceptr hip_d_st_digests_buf;
HIPdeviceptr hip_d_st_salts_buf;
HIPdeviceptr hip_d_st_esalts_buf;
hipDevice_t hip_device;
hipCtx_t hip_context;
hipStream_t hip_stream;
hipEvent_t hip_event1;
hipEvent_t hip_event2;
hipEvent_t hip_event3;
hipModule_t hip_module;
hipModule_t hip_module_shared;
hipModule_t hip_module_mp;
hipModule_t hip_module_amp;
hipFunction_t hip_function1;
hipFunction_t hip_function12;
hipFunction_t hip_function2p;
hipFunction_t hip_function2;
hipFunction_t hip_function2e;
hipFunction_t hip_function23;
hipFunction_t hip_function3;
hipFunction_t hip_function4;
hipFunction_t hip_function_init2;
hipFunction_t hip_function_loop2p;
hipFunction_t hip_function_loop2;
hipFunction_t hip_function_mp;
hipFunction_t hip_function_mp_l;
hipFunction_t hip_function_mp_r;
hipFunction_t hip_function_amp;
hipFunction_t hip_function_tm;
hipFunction_t hip_function_memset;
hipFunction_t hip_function_bzero;
hipFunction_t hip_function_atinit;
hipFunction_t hip_function_utf8toutf16le;
hipFunction_t hip_function_decompress;
hipFunction_t hip_function_aux1;
hipFunction_t hip_function_aux2;
hipFunction_t hip_function_aux3;
hipFunction_t hip_function_aux4;
hipDeviceptr_t hip_d_pws_buf;
hipDeviceptr_t hip_d_pws_amp_buf;
hipDeviceptr_t hip_d_pws_comp_buf;
hipDeviceptr_t hip_d_pws_idx;
hipDeviceptr_t hip_d_rules;
hipDeviceptr_t hip_d_rules_c;
hipDeviceptr_t hip_d_combs;
hipDeviceptr_t hip_d_combs_c;
hipDeviceptr_t hip_d_bfs;
hipDeviceptr_t hip_d_bfs_c;
hipDeviceptr_t hip_d_tm_c;
hipDeviceptr_t hip_d_bitmap_s1_a;
hipDeviceptr_t hip_d_bitmap_s1_b;
hipDeviceptr_t hip_d_bitmap_s1_c;
hipDeviceptr_t hip_d_bitmap_s1_d;
hipDeviceptr_t hip_d_bitmap_s2_a;
hipDeviceptr_t hip_d_bitmap_s2_b;
hipDeviceptr_t hip_d_bitmap_s2_c;
hipDeviceptr_t hip_d_bitmap_s2_d;
hipDeviceptr_t hip_d_plain_bufs;
hipDeviceptr_t hip_d_digests_buf;
hipDeviceptr_t hip_d_digests_shown;
hipDeviceptr_t hip_d_salt_bufs;
hipDeviceptr_t hip_d_esalt_bufs;
hipDeviceptr_t hip_d_tmps;
hipDeviceptr_t hip_d_hooks;
hipDeviceptr_t hip_d_result;
hipDeviceptr_t hip_d_extra0_buf;
hipDeviceptr_t hip_d_extra1_buf;
hipDeviceptr_t hip_d_extra2_buf;
hipDeviceptr_t hip_d_extra3_buf;
hipDeviceptr_t hip_d_root_css_buf;
hipDeviceptr_t hip_d_markov_css_buf;
hipDeviceptr_t hip_d_st_digests_buf;
hipDeviceptr_t hip_d_st_salts_buf;
hipDeviceptr_t hip_d_st_esalts_buf;
// API: opencl
@ -1727,8 +1730,8 @@ typedef struct backend_ctx
int rc_hip_init;
int rc_hiprtc_init;
int hiprtc_driver_version;
int hip_driver_version;
int hip_runtimeVersion;
int hip_driverVersion;
// opencl
@ -2044,9 +2047,11 @@ typedef struct tuning_db
tuning_db_alias_t *alias_buf;
int alias_cnt;
int alias_alloc;
tuning_db_entry_t *entry_buf;
int entry_cnt;
int entry_alloc;
} tuning_db_t;
@ -2650,6 +2655,7 @@ typedef struct module_ctx
u32 (*module_dgst_pos3) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u32 (*module_dgst_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u64 (*module_esalt_size) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
const char *(*module_extra_tuningdb_block) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u32 (*module_forced_outfile_format) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
u32 (*module_hash_category) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);
const char *(*module_hash_name) (const hashconfig_t *, const user_options_t *, const user_options_extra_t *);

@ -53,7 +53,7 @@ endif
## Do not modify
##
MODULE_INTERFACE_VERSION := 620
MODULE_INTERFACE_VERSION := 630
##
## Native compiler paths
@ -203,12 +203,11 @@ endif
## because UNRAR
ifeq ($(ENABLE_UNRAR),1)
ifeq ($(USE_SYSTEM_UNRAR),0)
ifneq ($(UNAME),Darwin)
CFLAGS_UNRAR += -Wno-misleading-indentation
ifneq ($(CC),clang)
CFLAGS_UNRAR += -Wno-class-memaccess
else
CFLAGS_UNRAR += -Wno-missing-braces
CFLAGS_UNRAR += -Wno-misleading-indentation
endif
CFLAGS_UNRAR += -Wno-missing-braces
CFLAGS_UNRAR += -Wno-unused-variable
CFLAGS_UNRAR += -Wno-unused-parameter
CFLAGS_UNRAR += -Wno-unused-function

@ -10,7 +10,7 @@
#include "status.h"
#include "autotune.h"
static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops, const u32 kernel_threads)
{
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
user_options_t *user_options = hashcat_ctx->user_options;
@ -19,7 +19,9 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
u32 kernel_power_try = device_param->hardware_power * kernel_accel;
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads;
u32 kernel_power_try = hardware_power * kernel_accel;
if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)
{
@ -33,6 +35,10 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
}
}
const u32 kernel_threads_sav = device_param->kernel_threads;
device_param->kernel_threads = kernel_threads;
const double spin_damp_sav = device_param->spin_damp;
device_param->spin_damp = 0;
@ -50,71 +56,51 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
}
else
{
run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_try, true, 0);
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_try, true, 0);
}
run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 0, kernel_power_try, true, 0);
}
device_param->spin_damp = spin_damp_sav;
device_param->kernel_threads = kernel_threads_sav;
const double exec_msec_prev = get_avg_exec_time (device_param, 1);
return exec_msec_prev;
}
/*
static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops, const u32 kernel_threads, const int times)
{
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
device_param->kernel_params_buf32[28] = 0;
device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
const u32 kernel_power_try = device_param->hardware_power * kernel_accel;
const u32 kernel_threads_sav = device_param->kernel_threads;
double exec_msec_best = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
const double spin_damp_sav = device_param->spin_damp;
device_param->spin_damp = 0;
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
for (int i = 1; i < times; i++)
{
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple1;
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_try, true, 0);
}
else
{
device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple4;
if (exec_msec > exec_msec_best) continue;
run_kernel (hashcat_ctx, device_param, KERN_RUN_4, 0, kernel_power_try, true, 0);
}
exec_msec_best = exec_msec;
}
else
{
device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple2;
run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 0, kernel_power_try, true, 0);
}
return exec_msec_best;
}
device_param->kernel_threads = kernel_threads_sav;
static u32 previous_power_of_two (const u32 x)
{
// https://stackoverflow.com/questions/2679815/previous-power-of-2
// really cool!
device_param->spin_damp = spin_damp_sav;
if (x == 0) return 0;
const double exec_msec_prev = get_avg_exec_time (device_param, 1);
u32 r = x;
return exec_msec_prev;
r |= (r >> 1);
r |= (r >> 2);
r |= (r >> 4);
r |= (r >> 8);
r |= (r >> 16);
return r - (r >> 1);
}
*/
static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
{
@ -131,9 +117,57 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
const u32 kernel_loops_min = device_param->kernel_loops_min;
const u32 kernel_loops_max = device_param->kernel_loops_max;
const u32 kernel_threads_min = device_param->kernel_threads_min;
const u32 kernel_threads_max = device_param->kernel_threads_max;
u32 kernel_accel = kernel_accel_min;
u32 kernel_loops = kernel_loops_min;
// for the threads we take as initial value what we receive from the runtime
// but is only to start with something, we will fine tune this value as soon as we have our workload specified
// this thread limiting is also performed insinde run_kernel() so we need to redo it here, too
u32 kernel_wgs = 0;
u32 kernel_wgs_multiple = 0;
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
kernel_wgs = device_param->kernel_wgs1;
kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple1;
}
else
{
kernel_wgs = device_param->kernel_wgs4;
kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple4;
}
}
else
{
kernel_wgs = device_param->kernel_wgs2;
kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple2;
}
u32 kernel_threads = kernel_threads_max;
if ((kernel_wgs >= kernel_threads_min) && (kernel_wgs <= kernel_threads_max))
{
kernel_threads = kernel_wgs;
}
// having a value power of 2 makes it easier to divide
const u32 kernel_threads_p2 = previous_power_of_two (kernel_threads);
if ((kernel_threads_p2 >= kernel_threads_min) && (kernel_threads_p2 <= kernel_threads_max))
{
kernel_threads = kernel_threads_p2;
}
// in this case the user specified a fixed -n and -u on the commandline
// no way to tune anything
// but we need to run a few caching rounds
@ -149,10 +183,10 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (hashconfig->warmup_disable == false)
{
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
}
#endif
@ -164,29 +198,19 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max;
int CU_rc;
int HIP_rc;
int CL_rc;
if (device_param->is_cuda == true)
{
CU_rc = run_cuda_kernel_atinit (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, kernel_power_max);
if (CU_rc == -1) return -1;
if (run_cuda_kernel_atinit (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, kernel_power_max) == -1) return -1;
}
if (device_param->is_hip == true)
{
HIP_rc = run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max);
if (HIP_rc == -1) return -1;
if (run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max) == -1) return -1;
}
if (device_param->is_opencl == true)
{
CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
if (CL_rc == -1) return -1;
if (run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max) == -1) return -1;
}
if (user_options->slow_candidates == true)
@ -200,35 +224,53 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
if (device_param->is_cuda == true)
{
CU_rc = hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t));
if (CU_rc == -1) return -1;
if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->cuda_stream) == -1) return -1;
}
if (device_param->is_hip == true)
{
HIP_rc = hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t));
if (HIP_rc == -1) return -1;
if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
}
if (device_param->is_opencl == true)
{
CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
if (CL_rc == -1) return -1;
if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1;
}
}
}
}
// we also need to initialize some values using kernels
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
// nothing to do
}
else
{
const u32 kernel_threads_sav = device_param->kernel_threads;
device_param->kernel_threads = device_param->kernel_wgs1;
run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_max, false, 0);
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
device_param->kernel_threads = device_param->kernel_wgs2p;
run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_max, false, 0);
}
device_param->kernel_threads = kernel_threads_sav;
}
// Do a pre-autotune test run to find out if kernel runtime is above some TDR limit
u32 kernel_loops_max_reduced = kernel_loops_max;
if (true)
{
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min, kernel_threads);
if (exec_msec > 2000)
{
@ -237,7 +279,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
return -1;
}
exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min, kernel_threads);
const u32 mm = kernel_loops_max / kernel_loops_min;
@ -257,16 +299,16 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
if (kernel_loops > kernel_loops_max_reduced) continue;
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops);
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops, kernel_threads, 1);
if (exec_msec < target_msec) break;
}
}
// now the same for kernel-accel but with the new kernel-loops from previous loop set
#define STEPS_CNT 16
// now the same for kernel-accel but with the new kernel-loops from previous loop set
if (kernel_accel_min < kernel_accel_max)
{
for (int i = 0; i < STEPS_CNT; i++)
@ -276,7 +318,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (kernel_accel_try < kernel_accel_min) continue;
if (kernel_accel_try > kernel_accel_max) break;
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops);
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops, kernel_threads, 1);
if (exec_msec > target_msec) break;
@ -292,7 +334,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
const u32 kernel_accel_orig = kernel_accel;
const u32 kernel_loops_orig = kernel_loops;
double exec_msec_prev = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
double exec_msec_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 1);
for (int i = 1; i < STEPS_CNT; i++)
{
@ -307,7 +349,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
// do a real test
const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
const double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try, kernel_threads, 1);
if (exec_msec_prev < exec_msec) break;
@ -324,7 +366,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
}
}
double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
double exec_msec_pre_final = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 1);
const u32 exec_left = (const u32) (target_msec / exec_msec_pre_final);
@ -339,46 +381,43 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
kernel_accel *= exec_accel_min;
}
// start finding best thread count is easier.
// it's either the preferred or the maximum thread count
/*
const u32 kernel_threads_min = device_param->kernel_threads_min;
const u32 kernel_threads_max = device_param->kernel_threads_max;
// v6.2.4 new section: find thread count
// This is not as effective as it could be because of inaccurate kernel return timers
// But is better than fixed values
// Timers in this section are critical, so we rerun meassurements 3 times
if (kernel_threads_min < kernel_threads_max)
if (kernel_threads_max > kernel_threads_min)
{
const double exec_msec_max = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
const u32 kernel_accel_orig = kernel_accel;
const u32 kernel_threads_orig = kernel_threads;
u32 preferred_threads = 0;
double exec_msec_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 3);
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
{
preferred_threads = device_param->kernel_preferred_wgs_multiple1;
}
else
{
preferred_threads = device_param->kernel_preferred_wgs_multiple4;
}
}
else
for (int i = 1; i < STEPS_CNT; i++)
{
preferred_threads = device_param->kernel_preferred_wgs_multiple2;
}
const u32 kernel_accel_try = kernel_accel_orig * (1U << i);
const u32 kernel_threads_try = kernel_threads_orig / (1U << i);
if ((preferred_threads >= kernel_threads_min) && (preferred_threads <= kernel_threads_max))
{
const double exec_msec_preferred = try_run_preferred (hashcat_ctx, device_param, kernel_accel, kernel_loops);
// since we do not modify total amount of workitems, we can (and need) to do increase kernel_accel_max
if (exec_msec_preferred < exec_msec_max)
{
device_param->kernel_threads = preferred_threads;
}
const u32 kernel_accel_max_try = kernel_accel_max * (1U << i);
if (kernel_accel_try > kernel_accel_max_try) break;
if (kernel_threads_try < kernel_threads_min) break;
if (kernel_threads_try % kernel_wgs_multiple) break; // this would just be waste of time
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops, kernel_threads_try, 3);
if (exec_msec > exec_msec_prev) continue;
exec_msec_prev = exec_msec;
kernel_accel = kernel_accel_try;
kernel_threads = kernel_threads_try;
}
}
*/
}
// reset them fake words
@ -386,77 +425,43 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (device_param->is_cuda == true)
{
int CU_rc;
CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, device_param->size_pws);
if (CU_rc == -1) return -1;
CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, device_param->size_plains);
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, device_param->size_pws) == -1) return -1;
if (CU_rc == -1) return -1;
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_plain_bufs, device_param->size_plains) == -1) return -1;
CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown);
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown, device_param->size_shown) == -1) return -1;
if (CU_rc == -1) return -1;
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results) == -1) return -1;
CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results);
if (CU_rc == -1) return -1;
CU_rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps, device_param->size_tmps);
if (CU_rc == -1) return -1;
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps, device_param->size_tmps) == -1) return -1;
}
if (device_param->is_hip == true)
{
int HIP_rc;
HIP_rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf, device_param->size_pws);
if (HIP_rc == -1) return -1;
HIP_rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains);
if (HIP_rc == -1) return -1;
HIP_rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown);
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf, device_param->size_pws) == -1) return -1;
if (HIP_rc == -1) return -1;
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1;
HIP_rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results);
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1;
if (HIP_rc == -1) return -1;
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1;
HIP_rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps);
if (HIP_rc == -1) return -1;
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1;
}
if (device_param->is_opencl == true)
{
int CL_rc;
CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws);
if (CL_rc == -1) return -1;
CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, device_param->size_plains);
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, device_param->size_pws) == -1) return -1;
if (CL_rc == -1) return -1;
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_plain_bufs, device_param->size_plains) == -1) return -1;
CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown);
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, device_param->size_shown) == -1) return -1;
if (CL_rc == -1) return -1;
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, device_param->size_results) == -1) return -1;
CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, device_param->size_results);
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps) == -1) return -1;
if (CL_rc == -1) return -1;
CL_rc = run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps);
if (CL_rc == -1) return -1;
if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
}
// reset timer
@ -478,8 +483,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
// store
device_param->kernel_accel = kernel_accel;
device_param->kernel_loops = kernel_loops;
device_param->kernel_accel = kernel_accel;
device_param->kernel_loops = kernel_loops;
device_param->kernel_threads = kernel_threads;
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads;
device_param->hardware_power = hardware_power;
const u32 kernel_power = device_param->hardware_power * device_param->kernel_accel;

File diff suppressed because it is too large Load Diff

@ -53,7 +53,6 @@ int adl_init (void *hashcat_ctx)
HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Adapter_AdapterInfo_Get, ADL_ADAPTER_ADAPTERINFO_GET, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Display_DisplayInfo_Get, ADL_DISPLAY_DISPLAYINFO_GET, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0);
HC_LOAD_FUNC(adl, ADL_Overdrive5_CurrentActivity_Get, ADL_OVERDRIVE5_CURRENTACTIVITY_GET, ADL, 0);

@ -300,7 +300,7 @@ int save_hash (hashcat_ctx_t *hashcat_ctx)
return 0;
}
void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, plain_t *plain)
int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, plain_t *plain)
{
const debugfile_ctx_t *debugfile_ctx = hashcat_ctx->debugfile_ctx;
const hashes_t *hashes = hashcat_ctx->hashes;
@ -313,23 +313,63 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
void *tmps = NULL;
cl_event opencl_event;
int rc = -1;
if (hashconfig->opts_type & OPTS_TYPE_COPY_TMPS)
{
tmps = hcmalloc (hashconfig->tmp_size);
if (device_param->is_cuda == true)
{
hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->cuda_stream);
if (rc == 0)
{
rc = hc_cuEventRecord (hashcat_ctx, device_param->cuda_event3, device_param->cuda_stream);
}
if (rc == -1)
{
hcfree (tmps);
return -1;
}
}
if (device_param->is_hip == true)
{
hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->hip_stream);
if (rc == 0)
{
rc = hc_hipEventRecord (hashcat_ctx, device_param->hip_event3, device_param->hip_stream);
}
if (rc == -1)
{
hcfree (tmps);
return -1;
}
}
if (device_param->is_opencl == true)
{
hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_FALSE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
if (rc == 0)
{
rc = hc_clFlush (hashcat_ctx, device_param->opencl_command_queue);
}
if (rc == -1)
{
hcfree (tmps);
return -1;
}
}
}
@ -337,15 +377,14 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
u8 *out_buf = hashes->out_buf;
int out_len = hash_encode (hashcat_ctx->hashconfig, hashcat_ctx->hashes, hashcat_ctx->module_ctx, (char *) out_buf, HCBUFSIZ_LARGE, salt_pos, digest_pos);
int out_len = hash_encode (hashconfig, hashes, module_ctx, (char *) out_buf, HCBUFSIZ_LARGE, salt_pos, digest_pos);
out_buf[out_len] = 0;
// plain
u8 plain_buf[0x1000]; // while the password itself can have only length 256, the module could encode it with something like base64 which inflates the requires buffer size
memset (plain_buf, 0, sizeof (plain_buf));
u8 plain_buf[HCBUFSIZ_TINY] = { 0 }; // while the password itself can have only length 256, the module could encode it with something like base64 which inflates the requires buffer size
u8 postprocess_buf[HCBUFSIZ_TINY] = { 0 };
u8 *plain_ptr = plain_buf;
@ -355,18 +394,27 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
if (module_ctx->module_build_plain_postprocess != MODULE_DEFAULT)
{
u8 temp_buf[0x1000];
if (hashconfig->opts_type & OPTS_TYPE_COPY_TMPS)
{
if (device_param->is_cuda == true)
{
if (hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event3) == -1) return -1;
}
memset (temp_buf, 0, sizeof (temp_buf));
if (device_param->is_hip == true)
{
if (hc_hipEventSynchronize (hashcat_ctx, device_param->hip_event3) == -1) return -1;
}
const int temp_len = module_ctx->module_build_plain_postprocess (hashcat_ctx->hashconfig, hashcat_ctx->hashes, tmps, (u32 *) plain_buf, sizeof (plain_buf), plain_len, (u32 *)temp_buf, sizeof (temp_buf));
if (device_param->is_opencl == true)
{
if (hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event) == -1) return -1;
}
}
if (temp_len < (int) sizeof (plain_buf))
{
memcpy (plain_buf, temp_buf, temp_len);
plain_len = module_ctx->module_build_plain_postprocess (hashconfig, hashes, tmps, (u32 *) plain_buf, sizeof (plain_buf), plain_len, (u32 *) postprocess_buf, sizeof (postprocess_buf));
plain_len = temp_len;
}
plain_ptr = postprocess_buf;
}
// crackpos
@ -407,6 +455,24 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
if (module_ctx->module_hash_encode_potfile != MODULE_DEFAULT)
{
if (hashconfig->opts_type & OPTS_TYPE_COPY_TMPS)
{
if (device_param->is_cuda == true)
{
if (hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event3) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipEventSynchronize (hashcat_ctx, device_param->hip_event3) == -1) return -1;
}
if (device_param->is_opencl == true)
{
if (hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event) == -1) return -1;
}
}
salt_t *salts_buf = hashes->salts_buf;
salts_buf += salt_pos;
@ -471,7 +537,14 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
if (hashconfig->opts_type & OPTS_TYPE_COPY_TMPS)
{
hcfree (tmps);
if (device_param->is_opencl == true)
{
if (hc_clReleaseEvent (hashcat_ctx, opencl_event) == -1) return -1;
}
}
return 0;
}
//int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 salt_pos)
@ -485,172 +558,204 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
u32 num_cracked = 0;
int CU_rc;
int HIP_rc;
int CL_rc;
int rc = -1;
if (device_param->is_cuda == true)
{
CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32));
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
if (CU_rc == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
}
if (device_param->is_hip == true)
{
HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32));
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
if (HIP_rc == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
}
if (device_param->is_opencl == true)
{
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
if (CL_rc == -1) return -1;
/* blocking */
if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL) == -1) return -1;
}
if (user_options->speed_only == true)
if (num_cracked == 0 || user_options->speed_only == true)
{
// we want the hc_clEnqueueReadBuffer to run in benchmark mode because it has an influence in performance
// we want to get the num_cracked in benchmark mode because it has an influence in performance
// however if the benchmark cracks the artificial hash used for benchmarks we don't want to see that!
return 0;
}
if (num_cracked)
plain_t *cracked = (plain_t *) hcmalloc (num_cracked * sizeof (plain_t));
if (device_param->is_cuda == true)
{
plain_t *cracked = (plain_t *) hccalloc (num_cracked, sizeof (plain_t));
rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->cuda_stream);
if (device_param->is_cuda == true)
if (rc == 0)
{
rc = hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream);
}
if (rc == -1)
{
CU_rc = hc_cuMemcpyDtoH (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t));
hcfree (cracked);
if (CU_rc == -1) return -1;
return -1;
}
}
if (device_param->is_hip == true)
if (device_param->is_hip == true)
{
rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->hip_stream);
if (rc == 0)
{
HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t));
rc = hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream);
}
if (HIP_rc == -1) return -1;
if (rc == -1)
{
hcfree (cracked);
return -1;
}
}
if (device_param->is_opencl == true)
if (device_param->is_opencl == true)
{
/* blocking */
rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
if (rc == -1)
{
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
hcfree (cracked);
if (CL_rc == -1) return -1;
return -1;
}
}
u32 cpt_cracked = 0;
u32 cpt_cracked = 0;
hc_thread_mutex_lock (status_ctx->mux_display);
hc_thread_mutex_lock (status_ctx->mux_display);
for (u32 i = 0; i < num_cracked; i++)
{
const u32 hash_pos = cracked[i].hash_pos;
for (u32 i = 0; i < num_cracked; i++)
{
const u32 hash_pos = cracked[i].hash_pos;
if (hashes->digests_shown[hash_pos] == 1) continue;
if (hashes->digests_shown[hash_pos] == 1) continue;
const u32 salt_pos = cracked[i].salt_pos;
salt_t *salt_buf = &hashes->salts_buf[salt_pos];
const u32 salt_pos = cracked[i].salt_pos;
salt_t *salt_buf = &hashes->salts_buf[salt_pos];
if ((hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) == 0)
{
hashes->digests_shown[hash_pos] = 1;
if ((hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK) == 0)
{
hashes->digests_shown[hash_pos] = 1;
hashes->digests_done++;
hashes->digests_done++;
cpt_cracked++;
cpt_cracked++;
salt_buf->digests_done++;
salt_buf->digests_done++;
if (salt_buf->digests_done == salt_buf->digests_cnt)
{
hashes->salts_shown[salt_pos] = 1;
if (salt_buf->digests_done == salt_buf->digests_cnt)
{
hashes->salts_shown[salt_pos] = 1;
hashes->salts_done++;
}
hashes->salts_done++;
}
}
if (hashes->salts_done == hashes->salts_cnt) mycracked (hashcat_ctx);
if (hashes->salts_done == hashes->salts_cnt) mycracked (hashcat_ctx);
check_hash (hashcat_ctx, device_param, &cracked[i]);
rc = check_hash (hashcat_ctx, device_param, &cracked[i]);
if (hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK)
{
// we need to reset cracked state on the device
// otherwise host thinks again and again the hash was cracked
// and returns invalid password each time
if (rc == -1)
{
break;
}
memset (hashes->digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (u32));
if (hashconfig->opts_type & OPTS_TYPE_PT_NEVERCRACK)
{
// we need to reset cracked state on the device
// otherwise host thinks again and again the hash was cracked
// and returns invalid password each time
if (device_param->is_cuda == true)
{
CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32));
if (device_param->is_cuda == true)
{
rc = run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), salt_buf->digests_cnt * sizeof (u32));
if (CU_rc == -1) return -1;
if (rc == -1)
{
break;
}
}
if (device_param->is_hip == true)
{
HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32));
if (device_param->is_hip == true)
{
rc = run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), salt_buf->digests_cnt * sizeof (u32));
if (HIP_rc == -1) return -1;
if (rc == -1)
{
break;
}
}
if (device_param->is_opencl == true)
{
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
if (device_param->is_opencl == true)
{
/* NOTE: run_opencl_kernel_bzero() does not handle buffer offset */
rc = run_opencl_kernel_memset32 (hashcat_ctx, device_param, device_param->opencl_d_digests_shown, salt_buf->digests_offset * sizeof (u32), 0, salt_buf->digests_cnt * sizeof (u32));
if (CL_rc == -1) return -1;
if (rc == -1)
{
break;
}
}
}
}
hc_thread_mutex_unlock (status_ctx->mux_display);
hcfree (cracked);
if (cpt_cracked > 0)
{
hc_thread_mutex_lock (status_ctx->mux_display);
hc_thread_mutex_unlock (status_ctx->mux_display);
cpt_ctx->cpt_buf[cpt_ctx->cpt_pos].timestamp = time (NULL);
cpt_ctx->cpt_buf[cpt_ctx->cpt_pos].cracked = cpt_cracked;
hcfree (cracked);
cpt_ctx->cpt_pos++;
if (rc == -1)
{
return -1;
}
cpt_ctx->cpt_total += cpt_cracked;
if (cpt_cracked > 0)
{
hc_thread_mutex_lock (status_ctx->mux_display);
if (cpt_ctx->cpt_pos == CPT_CACHE) cpt_ctx->cpt_pos = 0;
cpt_ctx->cpt_buf[cpt_ctx->cpt_pos].timestamp = time (NULL);
cpt_ctx->cpt_buf[cpt_ctx->cpt_pos].cracked = cpt_cracked;
hc_thread_mutex_unlock (status_ctx->mux_display);
}
cpt_ctx->cpt_pos++;
num_cracked = 0;
cpt_ctx->cpt_total += cpt_cracked;
if (device_param->is_cuda == true)
{
CU_rc = hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_result, &num_cracked, sizeof (u32));
if (cpt_ctx->cpt_pos == CPT_CACHE) cpt_ctx->cpt_pos = 0;
if (CU_rc == -1) return -1;
}
hc_thread_mutex_unlock (status_ctx->mux_display);
}
if (device_param->is_hip == true)
{
HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_result, &num_cracked, sizeof (u32));
if (device_param->is_cuda == true)
{
if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, sizeof (u32)) == -1) return -1;
}
if (HIP_rc == -1) return -1;
}
if (device_param->is_hip == true)
{
if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
}
if (device_param->is_opencl == true)
{
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
if (device_param->is_opencl == true)
{
if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_result, sizeof (u32)) == -1) return -1;
if (CL_rc == -1) return -1;
}
if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
}
return 0;
@ -1569,7 +1674,6 @@ int hashes_init_stage2 (hashcat_ctx_t *hashcat_ctx)
u32 digests_done = 0;
u32 *digests_shown = (u32 *) hccalloc (digests_cnt, sizeof (u32));
u32 *digests_shown_tmp = (u32 *) hccalloc (digests_cnt, sizeof (u32));
u32 salts_cnt = 0;
u32 salts_done = 0;
@ -1706,7 +1810,6 @@ int hashes_init_stage2 (hashcat_ctx_t *hashcat_ctx)
hashes->digests_done = digests_done;
hashes->digests_buf = digests_buf_new;
hashes->digests_shown = digests_shown;
hashes->digests_shown_tmp = digests_shown_tmp;
hashes->salts_cnt = salts_cnt;
hashes->salts_done = salts_done;
@ -2193,7 +2296,6 @@ void hashes_destroy (hashcat_ctx_t *hashcat_ctx)
hcfree (hashes->digests_buf);
hcfree (hashes->digests_shown);
hcfree (hashes->digests_shown_tmp);
hcfree (hashes->salts_buf);
hcfree (hashes->salts_shown);

@ -682,7 +682,21 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
return PMActivity.iCurrentBusLanes;
}
// NO OD8
if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8)
{
ADLPMLogDataOutput odlpDataOutput;
memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput));
if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1)
{
hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false;
return -1;
}
return odlpDataOutput.sensors[PMLOG_BUS_LANES].value;
}
}
if (hwmon_ctx->hm_sysfs_amdgpu)

@ -150,6 +150,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
CHECK_DEFINED (module_ctx->module_esalt_size);
CHECK_DEFINED (module_ctx->module_extra_buffer_size);
CHECK_DEFINED (module_ctx->module_extra_tmp_size);
CHECK_DEFINED (module_ctx->module_extra_tuningdb_block);
CHECK_DEFINED (module_ctx->module_forced_outfile_format);
CHECK_DEFINED (module_ctx->module_hash_binary_count);
CHECK_DEFINED (module_ctx->module_hash_binary_parse);

@ -138,6 +138,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -161,6 +161,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -164,6 +164,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -156,6 +156,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -160,6 +160,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -155,6 +155,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -268,6 +268,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -170,6 +170,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -154,6 +154,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -162,6 +162,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -161,6 +161,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -140,6 +140,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -140,6 +140,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -136,6 +136,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -154,6 +154,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -158,6 +158,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -180,6 +180,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -184,6 +184,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -181,6 +181,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -180,6 +180,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -181,6 +181,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -160,6 +160,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -179,6 +179,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -162,6 +162,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -181,6 +181,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -176,6 +176,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -170,6 +170,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -153,6 +153,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -181,6 +181,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -188,6 +188,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -158,6 +158,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -158,6 +158,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -155,6 +155,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -122,6 +122,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -153,6 +153,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -249,6 +249,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -262,6 +262,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -341,6 +341,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -127,6 +127,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -138,6 +138,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -141,6 +141,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -165,6 +165,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -168,6 +168,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -175,6 +175,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -201,6 +201,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -197,6 +197,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -201,6 +201,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -177,6 +177,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -202,6 +202,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -202,6 +202,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

@ -203,6 +203,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_esalt_size = MODULE_DEFAULT;
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save