mirror of
https://github.com/hashcat/hashcat.git
synced 2024-12-23 07:08:19 +00:00
Make words_buf_r in DES bitsliced kernels __constant
This commit is contained in:
parent
72fc708042
commit
a71c69983d
@ -1881,7 +1881,37 @@ DECLSPEC void transpose32c (u32 data[32])
|
|||||||
swap (data[30], data[31], 1, 0x55555555);
|
swap (data[30], data[31], 1, 0x55555555);
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m01500m (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
//
|
||||||
|
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
||||||
|
//
|
||||||
|
|
||||||
|
__kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
||||||
|
{
|
||||||
|
const u64 gid = get_global_id (0);
|
||||||
|
|
||||||
|
const u32 block = gid / 32;
|
||||||
|
const u32 slice = gid % 32;
|
||||||
|
|
||||||
|
const u32 w0 = mod[gid];
|
||||||
|
|
||||||
|
const u32 w0s = (w0 << 1) & 0xfefefefe;
|
||||||
|
|
||||||
|
#ifdef _unroll
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
|
||||||
|
{
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2253,7 +2283,7 @@ DECLSPEC void m01500m (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m01500s (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
__kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2641,69 +2671,3 @@ DECLSPEC void m01500s (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
|
|
||||||
#include COMPARE_S
|
#include COMPARE_S
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
|
||||||
//
|
|
||||||
|
|
||||||
__kernel void m01500_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
|
||||||
{
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
|
|
||||||
const u32 block = gid / 32;
|
|
||||||
const u32 slice = gid % 32;
|
|
||||||
|
|
||||||
const u32 w0 = mod[gid];
|
|
||||||
|
|
||||||
const u32 w0s = (w0 << 1) & 0xfefefefe;
|
|
||||||
|
|
||||||
#ifdef _unroll
|
|
||||||
#pragma unroll
|
|
||||||
#endif
|
|
||||||
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
|
|
||||||
{
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m01500_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m01500_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
@ -1726,7 +1726,33 @@ DECLSPEC void transpose32c (u32 data[32])
|
|||||||
swap (data[30], data[31], 1, 0x55555555);
|
swap (data[30], data[31], 1, 0x55555555);
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m03000m (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
//
|
||||||
|
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
||||||
|
//
|
||||||
|
|
||||||
|
__kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
||||||
|
{
|
||||||
|
const u64 gid = get_global_id (0);
|
||||||
|
|
||||||
|
const u32 block = gid / 32;
|
||||||
|
const u32 slice = gid % 32;
|
||||||
|
|
||||||
|
const u32 w0 = mod[gid];
|
||||||
|
|
||||||
|
for (int i = 0; i < 32; i += 8)
|
||||||
|
{
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2098,7 +2124,7 @@ DECLSPEC void m03000m (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m03000s (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
__kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2486,65 +2512,3 @@ DECLSPEC void m03000s (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
|
|
||||||
#include COMPARE_S
|
#include COMPARE_S
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
|
||||||
//
|
|
||||||
|
|
||||||
__kernel void m03000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
|
||||||
{
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
|
|
||||||
const u32 block = gid / 32;
|
|
||||||
const u32 slice = gid % 32;
|
|
||||||
|
|
||||||
const u32 w0 = mod[gid];
|
|
||||||
|
|
||||||
for (int i = 0; i < 32; i += 8)
|
|
||||||
{
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m03000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m03000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
@ -1726,7 +1726,35 @@ DECLSPEC void transpose32c (u32 data[32])
|
|||||||
swap (data[30], data[31], 1, 0x55555555);
|
swap (data[30], data[31], 1, 0x55555555);
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m14000m (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
//
|
||||||
|
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
||||||
|
//
|
||||||
|
|
||||||
|
__kernel void m14000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
||||||
|
{
|
||||||
|
const u64 gid = get_global_id (0);
|
||||||
|
|
||||||
|
const u32 block = gid / 32;
|
||||||
|
const u32 slice = gid % 32;
|
||||||
|
|
||||||
|
const u32 w0 = mod[gid];
|
||||||
|
|
||||||
|
#ifdef _unroll
|
||||||
|
#pragma unroll
|
||||||
|
#endif
|
||||||
|
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
|
||||||
|
{
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice));
|
||||||
|
atomic_or (&words_buf_r[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void m14000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2234,7 +2262,7 @@ DECLSPEC void m14000m (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC void m14000s (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset)
|
__kernel void m14000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __constant const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
@ -2686,67 +2714,3 @@ DECLSPEC void m14000s (__global pw_t *pws, __global const kernel_rule_t *rules_b
|
|||||||
|
|
||||||
#include COMPARE_S
|
#include COMPARE_S
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
|
|
||||||
//
|
|
||||||
|
|
||||||
__kernel void m14000_tm (__global u32 *mod, __global bs_word_t *words_buf_r)
|
|
||||||
{
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
|
|
||||||
const u32 block = gid / 32;
|
|
||||||
const u32 slice = gid % 32;
|
|
||||||
|
|
||||||
const u32 w0 = mod[gid];
|
|
||||||
|
|
||||||
#ifdef _unroll
|
|
||||||
#pragma unroll
|
|
||||||
#endif
|
|
||||||
for (int i = 0, j = 0; i < 32; i += 8, j += 7)
|
|
||||||
{
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice));
|
|
||||||
atomic_or (&words_buf_r[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m14000_mxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m14000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void m14000_sxx (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bs_word_t *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max)
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* base
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 gid = get_global_id (0);
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
if (gid >= gid_max) return;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main
|
|
||||||
*/
|
|
||||||
|
|
||||||
m14000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset);
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user