Move kernel-code for -L to standalone files with -pure suffix

pull/1288/head
jsteube 7 years ago
parent 194af74e91
commit 52c1e15f3f

@ -0,0 +1,70 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_rp.h"
#include "inc_rp.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
__kernel void m00000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
}
}
__kernel void m00000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
}
}

@ -0,0 +1,110 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_scalar.cl"
#include "inc_hash_md5.cl"
__kernel void m00000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
md5_ctx_t ctx_outer;
md5_init (&ctx_outer);
md5_update_global (&ctx_outer, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx_inner = ctx_outer;
md5_update_global (&ctx_inner, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx_inner);
const u32 a = ctx_inner.h[0];
const u32 b = ctx_inner.h[1];
const u32 c = ctx_inner.h[2];
const u32 d = ctx_inner.h[3];
COMPARE_M_SCALAR (a, d, c, b);
}
}
__kernel void m00000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
md5_ctx_t ctx_outer;
md5_init (&ctx_outer);
md5_update_global (&ctx_outer, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx_inner = ctx_outer;
md5_update_global (&ctx_inner, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx_inner);
const u32 a = ctx_inner.h[0];
const u32 b = ctx_inner.h[1];
const u32 c = ctx_inner.h[2];
const u32 d = ctx_inner.h[3];
COMPARE_S_SCALAR (a, d, c, b);
}
}

@ -206,48 +206,6 @@ __kernel void m00000_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
{
}
__kernel void m00000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
md5_ctx_t ctx_outer;
md5_init (&ctx_outer);
md5_update_global (&ctx_outer, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx_inner = ctx_outer;
md5_update_global (&ctx_inner, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx_inner);
const u32 a = ctx_inner.h[0];
const u32 b = ctx_inner.h[1];
const u32 c = ctx_inner.h[2];
const u32 d = ctx_inner.h[3];
COMPARE_M_SCALAR (a, d, c, b);
}
}
__kernel void m00000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
@ -454,57 +412,3 @@ __kernel void m00000_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
__kernel void m00000_s16 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
}
__kernel void m00000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
md5_ctx_t ctx_outer;
md5_init (&ctx_outer);
md5_update_global (&ctx_outer, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
{
md5_ctx_t ctx_inner = ctx_outer;
md5_update_global (&ctx_inner, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx_inner);
const u32 a = ctx_inner.h[0];
const u32 b = ctx_inner.h[1];
const u32 c = ctx_inner.h[2];
const u32 d = ctx_inner.h[3];
COMPARE_S_SCALAR (a, d, c, b);
}
}

@ -0,0 +1,144 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
__kernel void m00000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32x w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, w, pw_len);
md5_final_vector (&ctx);
const u32x a = ctx.h[0];
const u32x b = ctx.h[1];
const u32x c = ctx.h[2];
const u32x d = ctx.h[3];
COMPARE_M_SIMD (a, d, c, b);
}
}
__kernel void m00000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32x w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, w, pw_len);
md5_final_vector (&ctx);
const u32x a = ctx.h[0];
const u32x b = ctx.h[1];
const u32x c = ctx.h[2];
const u32x d = ctx.h[3];
COMPARE_S_SIMD (a, d, c, b);
}
}

@ -538,65 +538,6 @@ __kernel void m00000_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m00000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32x w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, w, pw_len);
md5_final_vector (&ctx);
const u32x a = ctx.h[0];
const u32x b = ctx.h[1];
const u32x c = ctx.h[2];
const u32x d = ctx.h[3];
COMPARE_M_SIMD (a, d, c, b);
}
}
__kernel void m00000_s04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
@ -710,74 +651,3 @@ __kernel void m00000_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
}
__kernel void m00000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 lid = get_local_id (0);
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[digests_offset].digest_buf[DGST_R0],
digests_buf[digests_offset].digest_buf[DGST_R1],
digests_buf[digests_offset].digest_buf[DGST_R2],
digests_buf[digests_offset].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32x w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, w, pw_len);
md5_final_vector (&ctx);
const u32x a = ctx.h[0];
const u32x b = ctx.h[1];
const u32x c = ctx.h[2];
const u32x d = ctx.h[3];
COMPARE_S_SIMD (a, d, c, b);
}
}

@ -0,0 +1,184 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
__kernel void m00400_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
md5_update_global (&md5_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len);
md5_final (&md5_ctx);
u32 digest[4];
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
}
__kernel void m00400_loop (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32 w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
u32 digest[4];
digest[0] = tmps[gid].digest_buf[0];
digest[1] = tmps[gid].digest_buf[1];
digest[2] = tmps[gid].digest_buf[2];
digest[3] = tmps[gid].digest_buf[3];
/**
* loop
*/
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
md5_update (&md5_ctx, w, pw_len);
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
if ((16 + pw_len + 1) >= 56)
{
for (u32 i = 1; i < loop_cnt; i++)
{
md5_init (&md5_ctx);
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
md5_update (&md5_ctx, w, pw_len);
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
}
}
else
{
for (u32 i = 1; i < loop_cnt; i++)
{
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_transform (md5_ctx.w0, md5_ctx.w1, md5_ctx.w2, md5_ctx.w3, digest);
}
}
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
}
__kernel void m00400_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 gid = get_global_id (0);
const u32 lid = get_local_id (0);
if (gid >= gid_max) return;
/**
* digest
*/
const u32 r0 = tmps[gid].digest_buf[DGST_R0];
const u32 r1 = tmps[gid].digest_buf[DGST_R1];
const u32 r2 = tmps[gid].digest_buf[DGST_R2];
const u32 r3 = tmps[gid].digest_buf[DGST_R3];
#define il_pos 0
#include COMPARE_M
}

@ -3,7 +3,7 @@
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
@ -11,11 +11,206 @@
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_hash_md5.cl"
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
void md5_transform_S (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4])
{
u32 a = digest[0];
u32 b = digest[1];
u32 c = digest[2];
u32 d = digest[3];
u32 w0_t = w0[0];
u32 w1_t = w0[1];
u32 w2_t = w0[2];
u32 w3_t = w0[3];
u32 w4_t = w1[0];
u32 w5_t = w1[1];
u32 w6_t = w1[2];
u32 w7_t = w1[3];
u32 w8_t = w2[0];
u32 w9_t = w2[1];
u32 wa_t = w2[2];
u32 wb_t = w2[3];
u32 wc_t = w3[0];
u32 wd_t = w3[1];
u32 we_t = w3[2];
u32 wf_t = 0;
MD5_STEP_S (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP_S (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP_S (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP_S (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP_S (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP_S (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP_S (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP_S (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP_S (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP_S (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP_S (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP_S (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP_S (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP_S (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP_S (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP_S (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP_S (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP_S (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP_S (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP_S (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
void md5_transform_V (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4])
{
u32x a = digest[0];
u32x b = digest[1];
u32x c = digest[2];
u32x d = digest[3];
u32x w0_t = w0[0];
u32x w1_t = w0[1];
u32x w2_t = w0[2];
u32x w3_t = w0[3];
u32x w4_t = w1[0];
u32x w5_t = w1[1];
u32x w6_t = w1[2];
u32x w7_t = w1[3];
u32x w8_t = w2[0];
u32x w9_t = w2[1];
u32x wa_t = w2[2];
u32x wb_t = w2[3];
u32x wc_t = w3[0];
u32x wd_t = w3[1];
u32x we_t = w3[2];
u32x wf_t = 0;
MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03);
MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00);
MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01);
MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02);
MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03);
MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13);
MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10);
MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11);
MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12);
MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13);
MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23);
MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20);
MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21);
MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22);
MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23);
MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33);
MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30);
MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31);
MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32);
MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33);
digest[0] += a;
digest[1] += b;
digest[2] += c;
digest[3] += d;
}
__kernel void m00400_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
@ -26,26 +221,86 @@ __kernel void m00400_init (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = 0;
w2[3] = 0;
const u32 pw_len = pws[gid].pw_len;
/**
* salt
*/
u32 salt_buf[2];
salt_buf[0] = salt_bufs[salt_pos].salt_buf[0];
salt_buf[1] = salt_bufs[salt_pos].salt_buf[1];
/**
* init
*/
md5_ctx_t md5_ctx;
u32 block_len = 8 + pw_len;
u32 block0[4];
block0[0] = salt_buf[0];
block0[1] = salt_buf[1];
block0[2] = w0[0];
block0[3] = w0[1];
u32 block1[4];
md5_init (&md5_ctx);
block1[0] = w0[2];
block1[1] = w0[3];
block1[2] = w1[0];
block1[3] = w1[1];
md5_update_global (&md5_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len);
u32 block2[4];
md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len);
block2[0] = w1[2];
block2[1] = w1[3];
block2[2] = w2[0];
block2[3] = w2[1];
md5_final (&md5_ctx);
u32 block3[4];
block3[0] = 0;
block3[1] = 0;
block3[2] = block_len * 8;
block3[3] = 0;
append_0x80_4x4_S (block0, block1, block2, block3, block_len);
/**
* init
*/
u32 digest[4];
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_transform_S (block0, block1, block2, block3, digest);
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
@ -61,101 +316,87 @@ __kernel void m00400_loop (__global pw_t *pws, __global const kernel_rule_t *rul
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
if ((gid * VECT_SIZE) >= gid_max) return;
const u32 pw_len = pws[gid].pw_len;
u32x w0[4];
u32x w1[4];
u32x w2[4];
const u32 pw_lenv = ceil ((float) pw_len / 4);
w0[0] = packv (pws, i, gid, 0);
w0[1] = packv (pws, i, gid, 1);
w0[2] = packv (pws, i, gid, 2);
w0[3] = packv (pws, i, gid, 3);
w1[0] = packv (pws, i, gid, 4);
w1[1] = packv (pws, i, gid, 5);
w1[2] = packv (pws, i, gid, 6);
w1[3] = packv (pws, i, gid, 7);
w2[0] = packv (pws, i, gid, 8);
w2[1] = packv (pws, i, gid, 9);
w2[2] = 0;
w2[3] = 0;
u32 w[64] = { 0 };
u32x pw_len = packvf (pws, pw_len, gid);
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
u32x digest[4];
barrier (CLK_GLOBAL_MEM_FENCE);
}
u32 digest[4];
digest[0] = tmps[gid].digest_buf[0];
digest[1] = tmps[gid].digest_buf[1];
digest[2] = tmps[gid].digest_buf[2];
digest[3] = tmps[gid].digest_buf[3];
digest[0] = packv (tmps, digest_buf, gid, 0);
digest[1] = packv (tmps, digest_buf, gid, 1);
digest[2] = packv (tmps, digest_buf, gid, 2);
digest[3] = packv (tmps, digest_buf, gid, 3);
/**
* loop
*/
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
md5_update (&md5_ctx, w, pw_len);
u32x block_len = (16 + pw_len);
u32x block0[4];
u32x block1[4];
u32x block2[4];
u32x block3[4];
block0[0] = 0;
block0[1] = 0;
block0[2] = 0;
block0[3] = 0;
block1[0] = w0[0];
block1[1] = w0[1];
block1[2] = w0[2];
block1[3] = w0[3];
block2[0] = w1[0];
block2[1] = w1[1];
block2[2] = w1[2];
block2[3] = w1[3];
block3[0] = w2[0];
block3[1] = w2[1];
block3[2] = block_len * 8;
block3[3] = 0;
append_0x80_4x4_VV (block0, block1, block2, block3, block_len);
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
/**
* init
*/
if ((16 + pw_len + 1) >= 56)
for (u32 i = 0; i < loop_cnt; i++)
{
for (u32 i = 1; i < loop_cnt; i++)
{
md5_init (&md5_ctx);
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
md5_ctx.len = 16;
md5_update (&md5_ctx, w, pw_len);
block0[0] = digest[0];
block0[1] = digest[1];
block0[2] = digest[2];
block0[3] = digest[3];
md5_final (&md5_ctx);
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
}
}
else
{
for (u32 i = 1; i < loop_cnt; i++)
{
md5_ctx.w0[0] = digest[0];
md5_ctx.w0[1] = digest[1];
md5_ctx.w0[2] = digest[2];
md5_ctx.w0[3] = digest[3];
digest[0] = MD5M_A;
digest[1] = MD5M_B;
digest[2] = MD5M_C;
digest[3] = MD5M_D;
md5_transform (md5_ctx.w0, md5_ctx.w1, md5_ctx.w2, md5_ctx.w3, digest);
}
md5_transform_V (block0, block1, block2, block3, digest);
}
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
unpackv (tmps, digest_buf, gid, 0, digest[0]);
unpackv (tmps, digest_buf, gid, 1, digest[1]);
unpackv (tmps, digest_buf, gid, 2, digest[2]);
unpackv (tmps, digest_buf, gid, 3, digest[3]);
}
__kernel void m00400_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global phpass_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
@ -181,4 +422,4 @@ __kernel void m00400_comp (__global pw_t *pws, __global const kernel_rule_t *rul
#define il_pos 0
#include COMPARE_M
}
}

@ -1,888 +0,0 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_hash_md5.cl"
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define md5crypt_magic 0x00243124u
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = tmp3;
tmp3 = tmp4;
tmp4 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0, selector);
#endif
const u32 div = block_len / 4;
switch (div)
{
case 0: block0[0] |= tmp0;
block0[1] = tmp1;
block0[2] = tmp2;
block0[3] = tmp3;
block1[0] = tmp4;
break;
case 1: block0[1] |= tmp0;
block0[2] = tmp1;
block0[3] = tmp2;
block1[0] = tmp3;
block1[1] = tmp4;
break;
case 2: block0[2] |= tmp0;
block0[3] = tmp1;
block1[0] = tmp2;
block1[1] = tmp3;
block1[2] = tmp4;
break;
case 3: block0[3] |= tmp0;
block1[0] = tmp1;
block1[1] = tmp2;
block1[2] = tmp3;
block1[3] = tmp4;
break;
case 4: block1[0] |= tmp0;
block1[1] = tmp1;
block1[2] = tmp2;
block1[3] = tmp3;
block2[0] = tmp4;
break;
case 5: block1[1] |= tmp0;
block1[2] = tmp1;
block1[3] = tmp2;
block2[0] = tmp3;
block2[1] = tmp4;
break;
case 6: block1[2] |= tmp0;
block1[3] = tmp1;
block2[0] = tmp2;
block2[1] = tmp3;
block2[2] = tmp4;
break;
case 7: block1[3] |= tmp0;
block2[0] = tmp1;
block2[1] = tmp2;
block2[2] = tmp3;
block2[3] = tmp4;
break;
case 8: block2[0] |= tmp0;
block2[1] = tmp1;
block2[2] = tmp2;
block2[3] = tmp3;
block3[0] = tmp4;
break;
case 9: block2[1] |= tmp0;
block2[2] = tmp1;
block2[3] = tmp2;
block3[0] = tmp3;
block3[1] = tmp4;
break;
}
}
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = tmp3;
tmp3 = tmp4;
tmp4 = 0x80;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0x80, selector);
#endif
const u32 div = block_len / 4;
switch (div)
{
case 0: block0[0] |= tmp0;
block0[1] = tmp1;
block0[2] = tmp2;
block0[3] = tmp3;
block1[0] = tmp4;
break;
case 1: block0[1] |= tmp0;
block0[2] = tmp1;
block0[3] = tmp2;
block1[0] = tmp3;
block1[1] = tmp4;
break;
case 2: block0[2] |= tmp0;
block0[3] = tmp1;
block1[0] = tmp2;
block1[1] = tmp3;
block1[2] = tmp4;
break;
case 3: block0[3] |= tmp0;
block1[0] = tmp1;
block1[1] = tmp2;
block1[2] = tmp3;
block1[3] = tmp4;
break;
case 4: block1[0] |= tmp0;
block1[1] = tmp1;
block1[2] = tmp2;
block1[3] = tmp3;
block2[0] = tmp4;
break;
case 5: block1[1] |= tmp0;
block1[2] = tmp1;
block1[3] = tmp2;
block2[0] = tmp3;
block2[1] = tmp4;
break;
case 6: block1[2] |= tmp0;
block1[3] = tmp1;
block2[0] = tmp2;
block2[1] = tmp3;
block2[2] = tmp4;
break;
case 7: block1[3] |= tmp0;
block2[0] = tmp1;
block2[1] = tmp2;
block2[2] = tmp3;
block2[3] = tmp4;
break;
case 8: block2[0] |= tmp0;
block2[1] = tmp1;
block2[2] = tmp2;
block2[3] = tmp3;
block3[0] = tmp4;
break;
case 9: block2[1] |= tmp0;
block2[2] = tmp1;
block2[3] = tmp2;
block3[0] = tmp3;
block3[1] = tmp4;
break;
}
}
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - (block_len & 3);
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign ( 0, append[1], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], 0, selector);
#endif
const u32 div = block_len / 4;
switch (div)
{
case 0: block0[0] |= tmp0;
block0[1] = tmp1;
block0[2] = tmp2;
break;
case 1: block0[1] |= tmp0;
block0[2] = tmp1;
block0[3] = tmp2;
break;
case 2: block0[2] |= tmp0;
block0[3] = tmp1;
block1[0] = tmp2;
break;
case 3: block0[3] |= tmp0;
block1[0] = tmp1;
block1[1] = tmp2;
break;
case 4: block1[0] |= tmp0;
block1[1] = tmp1;
block1[2] = tmp2;
break;
case 5: block1[1] |= tmp0;
block1[2] = tmp1;
block1[3] = tmp2;
break;
case 6: block1[2] |= tmp0;
block1[3] = tmp1;
block2[0] = tmp2;
break;
case 7: block1[3] |= tmp0;
block2[0] = tmp1;
block2[1] = tmp2;
break;
case 8: block2[0] |= tmp0;
block2[1] = tmp1;
block2[2] = tmp2;
break;
case 9: block2[1] |= tmp0;
block2[2] = tmp1;
block2[3] = tmp2;
break;
case 10: block2[2] |= tmp0;
block2[3] = tmp1;
block3[0] = tmp2;
break;
case 11: block2[3] |= tmp0;
block3[0] = tmp1;
block3[1] = tmp2;
break;
}
}
void append_sign (u32 block0[4], u32 block1[4], const u32 block_len)
{
switch (block_len)
{
case 0:
block0[0] = md5crypt_magic;
break;
case 1:
block0[0] = block0[0] | md5crypt_magic << 8u;
block0[1] = md5crypt_magic >> 24u;
break;
case 2:
block0[0] = block0[0] | md5crypt_magic << 16u;
block0[1] = md5crypt_magic >> 16u;
break;
case 3:
block0[0] = block0[0] | md5crypt_magic << 24u;
block0[1] = md5crypt_magic >> 8u;
break;
case 4:
block0[1] = md5crypt_magic;
break;
case 5:
block0[1] = block0[1] | md5crypt_magic << 8u;
block0[2] = md5crypt_magic >> 24u;
break;
case 6:
block0[1] = block0[1] | md5crypt_magic << 16u;
block0[2] = md5crypt_magic >> 16u;
break;
case 7:
block0[1] = block0[1] | md5crypt_magic << 24u;
block0[2] = md5crypt_magic >> 8u;
break;
case 8:
block0[2] = md5crypt_magic;
break;
case 9:
block0[2] = block0[2] | md5crypt_magic << 8u;
block0[3] = md5crypt_magic >> 24u;
break;
case 10:
block0[2] = block0[2] | md5crypt_magic << 16u;
block0[3] = md5crypt_magic >> 16u;
break;
case 11:
block0[2] = block0[2] | md5crypt_magic << 24u;
block0[3] = md5crypt_magic >> 8u;
break;
case 12:
block0[3] = md5crypt_magic;
break;
case 13:
block0[3] = block0[3] | md5crypt_magic << 8u;
block1[0] = md5crypt_magic >> 24u;
break;
case 14:
block0[3] = block0[3] | md5crypt_magic << 16u;
block1[0] = md5crypt_magic >> 16u;
break;
case 15:
block0[3] = block0[3] | md5crypt_magic << 24u;
block1[0] = md5crypt_magic >> 8u;
break;
}
}
void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append)
{
switch (block_len)
{
case 0:
block0[0] = append;
break;
case 1:
block0[0] = block0[0] | append << 8;
break;
case 2:
block0[0] = block0[0] | append << 16;
break;
case 3:
block0[0] = block0[0] | append << 24;
break;
case 4:
block0[1] = append;
break;
case 5:
block0[1] = block0[1] | append << 8;
break;
case 6:
block0[1] = block0[1] | append << 16;
break;
case 7:
block0[1] = block0[1] | append << 24;
break;
case 8:
block0[2] = append;
break;
case 9:
block0[2] = block0[2] | append << 8;
break;
case 10:
block0[2] = block0[2] | append << 16;
break;
case 11:
block0[2] = block0[2] | append << 24;
break;
case 12:
block0[3] = append;
break;
case 13:
block0[3] = block0[3] | append << 8;
break;
case 14:
block0[3] = block0[3] | append << 16;
break;
case 15:
block0[3] = block0[3] | append << 24;
break;
case 16:
block1[0] = append;
break;
case 17:
block1[0] = block1[0] | append << 8;
break;
case 18:
block1[0] = block1[0] | append << 16;
break;
case 19:
block1[0] = block1[0] | append << 24;
break;
case 20:
block1[1] = append;
break;
case 21:
block1[1] = block1[1] | append << 8;
break;
case 22:
block1[1] = block1[1] | append << 16;
break;
case 23:
block1[1] = block1[1] | append << 24;
break;
case 24:
block1[2] = append;
break;
case 25:
block1[2] = block1[2] | append << 8;
break;
case 26:
block1[2] = block1[2] | append << 16;
break;
case 27:
block1[2] = block1[2] | append << 24;
break;
case 28:
block1[3] = append;
break;
case 29:
block1[3] = block1[3] | append << 8;
break;
case 30:
block1[3] = block1[3] | append << 16;
break;
case 31:
block1[3] = block1[3] | append << 24;
break;
case 32:
block2[0] = append;
break;
case 33:
block2[0] = block2[0] | append << 8;
break;
case 34:
block2[0] = block2[0] | append << 16;
break;
case 35:
block2[0] = block2[0] | append << 24;
break;
case 36:
block2[1] = append;
break;
case 37:
block2[1] = block2[1] | append << 8;
break;
case 38:
block2[1] = block2[1] | append << 16;
break;
case 39:
block2[1] = block2[1] | append << 24;
break;
case 40:
block2[2] = append;
break;
case 41:
block2[2] = block2[2] | append << 8;
break;
case 42:
block2[2] = block2[2] | append << 16;
break;
case 43:
block2[2] = block2[2] | append << 24;
break;
case 44:
block2[3] = append;
break;
case 45:
block2[3] = block2[3] | append << 8;
break;
case 46:
block2[3] = block2[3] | append << 16;
break;
case 47:
block2[3] = block2[3] | append << 24;
break;
case 48:
block3[0] = append;
break;
case 49:
block3[0] = block3[0] | append << 8;
break;
case 50:
block3[0] = block3[0] | append << 16;
break;
case 51:
block3[0] = block3[0] | append << 24;
break;
case 52:
block3[1] = append;
break;
case 53:
block3[1] = block3[1] | append << 8;
break;
case 54:
block3[1] = block3[1] | append << 16;
break;
case 55:
block3[1] = block3[1] | append << 24;
break;
case 56:
block3[2] = append;
break;
}
}
__kernel void m00500_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32 w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 salt_lenv = ceil ((float) salt_len / 4);
u32 s[64] = { 0 };
for (int idx = 0; idx < salt_lenv; idx++)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* prepare
*/
md5_ctx_t md5_ctx1;
md5_init (&md5_ctx1);
md5_update (&md5_ctx1, w, pw_len);
md5_update (&md5_ctx1, s, salt_len);
md5_update (&md5_ctx1, w, pw_len);
md5_final (&md5_ctx1);
u32 final[16] = { 0 };
final[0] = md5_ctx1.h[0];
final[1] = md5_ctx1.h[1];
final[2] = md5_ctx1.h[2];
final[3] = md5_ctx1.h[3];
if (pw_len < 16)
{
truncate_block (final, pw_len);
}
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
md5_update (&md5_ctx, w, pw_len);
u32 m[16] = { 0 };
m[0] = md5crypt_magic;
md5_update (&md5_ctx, m, 3);
md5_update (&md5_ctx, s, salt_len);
for (int pl = pw_len; pl > 0; pl -= 16)
{
md5_update (&md5_ctx, final, pl > 16 ? 16 : pl);
}
/* Then something really weird... */
u32 z[16] = { 0 };
for (int i = pw_len; i != 0; i >>= 1)
{
if (i & 1)
{
md5_update (&md5_ctx, z, 1);
}
else
{
md5_update (&md5_ctx, w, 1);
}
}
md5_final (&md5_ctx);
tmps[gid].digest_buf[0] = md5_ctx.h[0];
tmps[gid].digest_buf[1] = md5_ctx.h[1];
tmps[gid].digest_buf[2] = md5_ctx.h[2];
tmps[gid].digest_buf[3] = md5_ctx.h[3];
}
__kernel void m00500_loop (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32 w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 salt_lenv = ceil ((float) salt_len / 4);
u32 s[64] = { 0 };
for (int idx = 0; idx < salt_lenv; idx++)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* digest
*/
u32 digest[16] = { 0 };
digest[0] = tmps[gid].digest_buf[0];
digest[1] = tmps[gid].digest_buf[1];
digest[2] = tmps[gid].digest_buf[2];
digest[3] = tmps[gid].digest_buf[3];
/**
* loop
*/
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
{
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
if (j & 1)
{
md5_update (&md5_ctx, w, pw_len);
}
else
{
md5_update (&md5_ctx, digest, 16);
}
if (j % 3)
{
md5_update (&md5_ctx, s, salt_len);
}
if (j % 7)
{
md5_update (&md5_ctx, w, pw_len);
}
if (j & 1)
{
md5_update (&md5_ctx, digest, 16);
}
else
{
md5_update (&md5_ctx, w, pw_len);
}
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
}
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
}
__kernel void m00500_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
const u32 lid = get_local_id (0);
/**
* digest
*/
const u32 r0 = tmps[gid].digest_buf[DGST_R0];
const u32 r1 = tmps[gid].digest_buf[DGST_R1];
const u32 r2 = tmps[gid].digest_buf[DGST_R2];
const u32 r3 = tmps[gid].digest_buf[DGST_R3];
#define il_pos 0
#include COMPARE_M
}

@ -0,0 +1,261 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"
#include "inc_types.cl"
#include "inc_common.cl"
#include "inc_hash_md5.cl"
#define COMPARE_S "inc_comp_single.cl"
#define COMPARE_M "inc_comp_multi.cl"
#define md5crypt_magic 0x00243124u
__kernel void m00500_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32 w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 salt_lenv = ceil ((float) salt_len / 4);
u32 s[64] = { 0 };
for (int idx = 0; idx < salt_lenv; idx++)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* prepare
*/
md5_ctx_t md5_ctx1;
md5_init (&md5_ctx1);
md5_update (&md5_ctx1, w, pw_len);
md5_update (&md5_ctx1, s, salt_len);
md5_update (&md5_ctx1, w, pw_len);
md5_final (&md5_ctx1);
u32 final[16] = { 0 };
final[0] = md5_ctx1.h[0];
final[1] = md5_ctx1.h[1];
final[2] = md5_ctx1.h[2];
final[3] = md5_ctx1.h[3];
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
md5_update (&md5_ctx, w, pw_len);
u32 m[16] = { 0 };
m[0] = md5crypt_magic;
md5_update (&md5_ctx, m, 3);
md5_update (&md5_ctx, s, salt_len);
int pl;
for (pl = pw_len; pl > 16; pl -= 16)
{
md5_update (&md5_ctx, final, 16);
}
truncate_block (final, pl);
md5_update (&md5_ctx, final, pl);
/* Then something really weird... */
for (int i = pw_len; i != 0; i >>= 1)
{
u32 t[16] = { 0 };
if (i & 1)
{
t[0] = 0;
}
else
{
t[0] = w[0] & 0xff;
}
md5_update (&md5_ctx, t, 1);
}
md5_final (&md5_ctx);
tmps[gid].digest_buf[0] = md5_ctx.h[0];
tmps[gid].digest_buf[1] = md5_ctx.h[1];
tmps[gid].digest_buf[2] = md5_ctx.h[2];
tmps[gid].digest_buf[3] = md5_ctx.h[3];
}
__kernel void m00500_loop (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* base
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
/**
* init
*/
const u32 pw_len = pws[gid].pw_len;
const u32 pw_lenv = ceil ((float) pw_len / 4);
u32 w[64] = { 0 };
for (int idx = 0; idx < pw_lenv; idx++)
{
w[idx] = pws[gid].i[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
const u32 salt_len = salt_bufs[salt_pos].salt_len;
const u32 salt_lenv = ceil ((float) salt_len / 4);
u32 s[64] = { 0 };
for (int idx = 0; idx < salt_lenv; idx++)
{
s[idx] = salt_bufs[salt_pos].salt_buf[idx];
barrier (CLK_GLOBAL_MEM_FENCE);
}
/**
* digest
*/
u32 digest[16] = { 0 };
digest[0] = tmps[gid].digest_buf[0];
digest[1] = tmps[gid].digest_buf[1];
digest[2] = tmps[gid].digest_buf[2];
digest[3] = tmps[gid].digest_buf[3];
/**
* loop
*/
for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
{
md5_ctx_t md5_ctx;
md5_init (&md5_ctx);
if (j & 1)
{
md5_update (&md5_ctx, w, pw_len);
}
else
{
md5_update (&md5_ctx, digest, 16);
}
if (j % 3)
{
md5_update (&md5_ctx, s, salt_len);
}
if (j % 7)
{
md5_update (&md5_ctx, w, pw_len);
}
if (j & 1)
{
md5_update (&md5_ctx, digest, 16);
}
else
{
md5_update (&md5_ctx, w, pw_len);
}
md5_final (&md5_ctx);
digest[0] = md5_ctx.h[0];
digest[1] = md5_ctx.h[1];
digest[2] = md5_ctx.h[2];
digest[3] = md5_ctx.h[3];
}
tmps[gid].digest_buf[0] = digest[0];
tmps[gid].digest_buf[1] = digest[1];
tmps[gid].digest_buf[2] = digest[2];
tmps[gid].digest_buf[3] = digest[3];
}
__kernel void m00500_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global md5crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)
{
/**
* modifier
*/
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
const u32 lid = get_local_id (0);
/**
* digest
*/
const u32 r0 = tmps[gid].digest_buf[DGST_R0];
const u32 r1 = tmps[gid].digest_buf[DGST_R1];
const u32 r2 = tmps[gid].digest_buf[DGST_R2];
const u32 r3 = tmps[gid].digest_buf[DGST_R3];
#define il_pos 0
#include COMPARE_M
}

@ -3,6 +3,8 @@
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#include "inc_vendor.cl"
#include "inc_hash_constants.h"
#include "inc_hash_functions.cl"

@ -20475,7 +20475,8 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->kern_type = KERN_TYPE_PHPASS;
hashconfig->dgst_size = DGST_SIZE_4_4;
hashconfig->parse_func = phpass_parse_hash;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE;
hashconfig->opti_type = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
hashconfig->dgst_pos0 = 0;
hashconfig->dgst_pos1 = 1;
hashconfig->dgst_pos2 = 2;
@ -24552,9 +24553,9 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
{
case 125: hashconfig->pw_max = 32;
break;
case 500: hashconfig->pw_max = 16;
case 500: hashconfig->pw_max = 15;
break;
case 1600: hashconfig->pw_max = 16;
case 1600: hashconfig->pw_max = 15;
break;
case 1800: hashconfig->pw_max = 16;
break;
@ -24564,7 +24565,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
break;
case 5800: hashconfig->pw_max = 16;
break;
case 6300: hashconfig->pw_max = 16;
case 6300: hashconfig->pw_max = 15;
break;
case 7000: hashconfig->pw_max = 19;
break;
@ -24597,16 +24598,14 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
case 15500: hashconfig->pw_max = 16;
break;
}
}
// fully converted to length 256
// pw_max : kernel fully compatible to length PW_MAX - those don't need to use --length-limit-disable
switch (hashconfig->hash_mode)
{
case 400: hashconfig->pw_max = 256;
break;
case 2100: hashconfig->pw_max = 256;
break;
}
switch (hashconfig->hash_mode)
{
case 2100: hashconfig->pw_max = PW_MAX;
break;
}
// pw_max : algo specific hard limits

@ -108,35 +108,75 @@ static int ocl_check_dri (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx)
return 0;
}
static void generate_source_kernel_filename (const u32 attack_exec, const u32 attack_kern, const u32 kern_type, char *shared_dir, char *source_file)
static void generate_source_kernel_filename (const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const bool length_limit_disable, char *shared_dir, char *source_file)
{
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
if (length_limit_disable == true)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a0.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a1.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a3.cl", shared_dir, (int) kern_type);
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a0-pure.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a1-pure.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a3-pure.cl", shared_dir, (int) kern_type);
}
else
{
snprintf (source_file, 255, "%s/OpenCL/m%05d-pure.cl", shared_dir, (int) kern_type);
}
}
else
snprintf (source_file, 255, "%s/OpenCL/m%05d.cl", shared_dir, (int) kern_type);
{
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a0.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a1.cl", shared_dir, (int) kern_type);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (source_file, 255, "%s/OpenCL/m%05d_a3.cl", shared_dir, (int) kern_type);
}
else
{
snprintf (source_file, 255, "%s/OpenCL/m%05d.cl", shared_dir, (int) kern_type);
}
}
}
static void generate_cached_kernel_filename (const u32 attack_exec, const u32 attack_kern, const u32 kern_type, char *profile_dir, const char *device_name_chksum, char *cached_file)
static void generate_cached_kernel_filename (const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const bool length_limit_disable, char *profile_dir, const char *device_name_chksum, char *cached_file)
{
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
if (length_limit_disable == true)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (cached_file, 255, "%s/kernels/m%05d_a0.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (cached_file, 255, "%s/kernels/m%05d_a1.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (cached_file, 255, "%s/kernels/m%05d_a3.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (cached_file, 255, "%s/kernels/m%05d_a0-pure.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (cached_file, 255, "%s/kernels/m%05d_a1-pure.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (cached_file, 255, "%s/kernels/m%05d_a3-pure.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
}
else
{
snprintf (cached_file, 255, "%s/kernels/m%05d-pure.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
}
}
else
{
snprintf (cached_file, 255, "%s/kernels/m%05d.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
if (attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if (attack_kern == ATTACK_KERN_STRAIGHT)
snprintf (cached_file, 255, "%s/kernels/m%05d_a0.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_COMBI)
snprintf (cached_file, 255, "%s/kernels/m%05d_a1.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
else if (attack_kern == ATTACK_KERN_BF)
snprintf (cached_file, 255, "%s/kernels/m%05d_a3.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
}
else
{
snprintf (cached_file, 255, "%s/kernels/m%05d.%s.kernel", profile_dir, (int) kern_type, device_name_chksum);
}
}
}
@ -3672,6 +3712,23 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
vector_width = user_options->opencl_vector_width;
}
// We can't have SIMD in kernels where final password length depends on user input we can't precompute
if (user_options->length_limit_disable == true)
{
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
if ((user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) || (user_options_extra->attack_kern == ATTACK_KERN_COMBI))
{
vector_width = 1;
}
}
else
{
vector_width = 1;
}
}
if (vector_width > 16) vector_width = 16;
device_param->vector_width = vector_width;
@ -4179,7 +4236,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
char source_file[256] = { 0 };
generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, folder_config->shared_dir, source_file);
generate_source_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, user_options->length_limit_disable, folder_config->shared_dir, source_file);
if (hc_path_read (source_file) == false)
{
@ -4194,7 +4251,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
char cached_file[256] = { 0 };
generate_cached_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, folder_config->profile_dir, device_name_chksum, cached_file);
generate_cached_kernel_filename (hashconfig->attack_exec, user_options_extra->attack_kern, hashconfig->kern_type, user_options->length_limit_disable, folder_config->profile_dir, device_name_chksum, cached_file);
bool cached = true;
@ -4930,58 +4987,106 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 4);
if (user_options->length_limit_disable == true)
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_sxx", hashconfig->kern_type);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
if (CL_rc == -1) return -1;
if (CL_rc == -1) return -1;
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 8);
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_threads_by_wgs_kernel4);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
if (CL_rc == -1) return -1;
}
else
{
// kernel1
if (CL_rc == -1) return -1;
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 4);
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 16);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
if (CL_rc == -1) return -1;
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_threads_by_wgs_kernel1);
if (user_options->length_limit_disable == true)
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_sxx", hashconfig->kern_type);
if (CL_rc == -1) return -1;
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
// kernel2
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 8);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_threads_by_wgs_kernel2);
if (CL_rc == -1) return -1;
// kernel3
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_s%02d", hashconfig->kern_type, 16);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_threads_by_wgs_kernel3);
if (CL_rc == -1) return -1;
}
}
else
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 4);
if (user_options->length_limit_disable == true)
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_mxx", hashconfig->kern_type);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
if (CL_rc == -1) return -1;
if (CL_rc == -1) return -1;
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 8);
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_threads_by_wgs_kernel4);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
if (CL_rc == -1) return -1;
}
else
{
// kernel1
if (CL_rc == -1) return -1;
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 4);
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 16);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
if (CL_rc == -1) return -1;
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_threads_by_wgs_kernel1);
if (user_options->length_limit_disable == true)
{
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_mxx", hashconfig->kern_type);
if (CL_rc == -1) return -1;
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel4);
// kernel2
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 8);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2);
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_threads_by_wgs_kernel2);
if (CL_rc == -1) return -1;
// kernel3
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_m%02d", hashconfig->kern_type, 16);
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3);
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_threads_by_wgs_kernel3);
if (CL_rc == -1) return -1;
}
@ -5013,6 +5118,10 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_threads_by_wgs_kernel1);
if (CL_rc == -1) return -1;
// kernel2
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_loop", hashconfig->kern_type);
@ -5021,6 +5130,10 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_threads_by_wgs_kernel2);
if (CL_rc == -1) return -1;
// kernel3
snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05u_comp", hashconfig->kern_type);
@ -5029,6 +5142,10 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
if (CL_rc == -1) return -1;
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_threads_by_wgs_kernel3);
if (CL_rc == -1) return -1;
// kernel12
if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
@ -5090,33 +5207,6 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
}
}
// kernel1
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_threads_by_wgs_kernel1);
if (CL_rc == -1) return -1;
// kernel2
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_threads_by_wgs_kernel2);
if (CL_rc == -1) return -1;
// kernel3
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_threads_by_wgs_kernel3);
if (CL_rc == -1) return -1;
// kernel4
if (user_options->length_limit_disable == true)
{
CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel4, &device_param->kernel_threads_by_wgs_kernel4);
if (CL_rc == -1) return -1;
}
// GPU memset
CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, "gpu_memset", &device_param->kernel_memset);

@ -50,10 +50,9 @@ my $MAX_LEN = 55;
my @modes = (0, 10, 11, 12, 20, 21, 22, 23, 30, 40, 50, 60, 100, 101, 110, 111, 112, 120, 121, 122, 125, 130, 131, 132, 133, 140, 141, 150, 160, 200, 300, 400, 500, 600, 900, 1000, 1100, 1300, 1400, 1410, 1411, 1420, 1430, 1440, 1441, 1450, 1460, 1500, 1600, 1700, 1710, 1711, 1720, 1730, 1740, 1722, 1731, 1750, 1760, 1800, 2100, 2400, 2410, 2500, 2600, 2611, 2612, 2711, 2811, 3000, 3100, 3200, 3710, 3711, 3300, 3500, 3610, 3720, 3800, 3910, 4010, 4110, 4210, 4300, 4400, 4500, 4520, 4521, 4522, 4600, 4700, 4800, 4900, 5000, 5100, 5300, 5400, 5500, 5600, 5700, 5800, 6000, 6100, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8900, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11900, 12000, 12001, 12100, 12200, 12300, 12400, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13800, 13900, 14000, 14100, 14400, 14700, 14800, 14900, 15000, 15100, 15200, 15300, 15400, 15500, 15600, 15700, 99999);
#my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 2100 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
my %is_utf16le = map { $_ => 1 } qw (30 40 130 131 132 133 140 141 1000 1100 1430 1440 1441 1730 1740 1731 5500 5600 8000 9400 9500 9600 9700 9800 11600 13500 13800);
my %less_fifteen = map { $_ => 1 } qw (500 1600 1800 2400 2410 3200 6300 7400 10500 10700);
my %allow_long_salt = map { $_ => 1 } qw (400 2100 2500 4520 4521 5500 5600 7100 7200 7300 9400 9500 9600 9700 9800 10400 10500 10600 10700 1100 11000 11200 11300 11400 11600 12600 13500 13800 15000);
my %allow_long_salt = map { $_ => 1 } qw (2500 4520 4521 5500 5600 7100 7200 7300 9400 9500 9600 9700 9800 10400 10500 10600 10700 1100 11000 11200 11300 11400 11600 12600 13500 13800 15000);
my @lotus_magic_table =
(
@ -3255,7 +3254,7 @@ sub passthrough
{
chomp ($word_buf);
next if length ($word_buf) > 256;
next if length ($word_buf) > 31;
##
## gen salt
@ -3340,9 +3339,9 @@ sub passthrough
}
elsif ($mode == 2100)
{
next if length ($word_buf) >= 256;
next if length ($word_buf) > 13;
my $salt_len = get_random_num (1, 256);
my $salt_len = get_random_num (1, 19);
$tmp_hash = gen_hash ($mode, $word_buf, substr ($salt_buf, 0, $salt_len));
}
@ -3807,7 +3806,7 @@ sub single
}
elsif ($mode == 111 || $mode == 122 || $mode == 125 || $mode == 131 || $mode == 132 || $mode == 400 || $mode == 500 || $mode == 1600 || $mode == 1722 || $mode == 1731 || $mode == 6300 || $mode == 7900 || $mode == 8100 || $mode == 11100)
{
for (my $i = 1; $i < 256; $i++)
for (my $i = 1; $i < 32; $i++)
{
if ($len != 0)
{
@ -3879,9 +3878,9 @@ sub single
}
elsif ($mode == 2100)
{
my $salt_len = get_random_num (1, 256);
my $salt_len = get_random_num (1, 19);
for (my $i = 1; $i < 256; $i++)
for (my $i = 1; $i < 13; $i++)
{
if ($len != 0)
{
@ -8840,6 +8839,7 @@ sub dpapi_pbkdf2
return substr ($t, 0, $keylen);
}
sub rnd
{
my $mode = shift;

Loading…
Cancel
Save