Change bitsliced kernels from 3d to 2d invocation mode for slightly better performance

pull/2315/head
Jens Steube 4 years ago
parent 6b8f0da8e9
commit f96e35649d

@ -1998,14 +1998,8 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -2035,6 +2029,8 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2290,6 +2286,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
#include COMPARE_M
}
}
}
}
KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
@ -2314,70 +2311,70 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0);
const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0);
const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0);
const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0);
const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0);
const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0);
const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0);
const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0);
const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0);
const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0);
const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0);
const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0);
const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0);
const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0);
const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0);
const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0);
const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0);
const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0);
const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0);
const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0);
const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);
/**
* base
@ -2450,14 +2447,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -2487,6 +2478,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2621,7 +2614,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D14 ^ S14;
tmpResult |= D15 ^ S15;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D16 ^ S16;
tmpResult |= D17 ^ S17;
@ -2640,7 +2633,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D30 ^ S30;
tmpResult |= D31 ^ S31;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D32 ^ S32;
tmpResult |= D33 ^ S33;
@ -2659,7 +2652,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D46 ^ S46;
tmpResult |= D47 ^ S47;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D48 ^ S48;
tmpResult |= D49 ^ S49;
@ -2678,11 +2671,12 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D62 ^ S62;
tmpResult |= D63 ^ S63;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
const u32 slice = ffz (tmpResult);
#ifdef KERNEL_STATIC
#include COMPARE_S
#endif
}
}

@ -1830,14 +1830,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -1871,6 +1865,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
u32 k30 = K30;
u32 k31 = K31;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2131,6 +2127,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ())
#include COMPARE_M
}
}
}
}
KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
@ -2149,70 +2146,70 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0);
const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0);
const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0);
const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0);
const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0);
const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0);
const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0);
const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0);
const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0);
const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0);
const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0);
const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0);
const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0);
const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0);
const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0);
const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0);
const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0);
const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0);
const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0);
const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0);
const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);
/**
* base
@ -2282,14 +2279,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -2323,6 +2314,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
u32 k30 = K30;
u32 k31 = K31;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2462,7 +2455,7 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D14 ^ S14;
tmpResult |= D15 ^ S15;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D16 ^ S16;
tmpResult |= D17 ^ S17;
@ -2481,7 +2474,7 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D30 ^ S30;
tmpResult |= D31 ^ S31;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D32 ^ S32;
tmpResult |= D33 ^ S33;
@ -2500,7 +2493,7 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D46 ^ S46;
tmpResult |= D47 ^ S47;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D48 ^ S48;
tmpResult |= D49 ^ S49;
@ -2519,11 +2512,12 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D62 ^ S62;
tmpResult |= D63 ^ S63;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
const u32 slice = ffz (tmpResult);
#ifdef KERNEL_STATIC
#include COMPARE_S
#endif
}
}

@ -1903,78 +1903,6 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
u32 D62 = d62;
u32 D63 = d63;
/**
* digest
*/
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
/**
* base
*/
@ -2043,14 +1971,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -2080,6 +2002,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2269,6 +2193,7 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ())
#endif
}
}
}
}
KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
@ -2426,70 +2351,70 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
const u32 s0 = digests_buf[0].digest_buf[0];
const u32 s1 = digests_buf[0].digest_buf[1];
#define S00 (((s0 >> 0) & 1) ? -1 : 0)
#define S01 (((s0 >> 1) & 1) ? -1 : 0)
#define S02 (((s0 >> 2) & 1) ? -1 : 0)
#define S03 (((s0 >> 3) & 1) ? -1 : 0)
#define S04 (((s0 >> 4) & 1) ? -1 : 0)
#define S05 (((s0 >> 5) & 1) ? -1 : 0)
#define S06 (((s0 >> 6) & 1) ? -1 : 0)
#define S07 (((s0 >> 7) & 1) ? -1 : 0)
#define S08 (((s0 >> 8) & 1) ? -1 : 0)
#define S09 (((s0 >> 9) & 1) ? -1 : 0)
#define S10 (((s0 >> 10) & 1) ? -1 : 0)
#define S11 (((s0 >> 11) & 1) ? -1 : 0)
#define S12 (((s0 >> 12) & 1) ? -1 : 0)
#define S13 (((s0 >> 13) & 1) ? -1 : 0)
#define S14 (((s0 >> 14) & 1) ? -1 : 0)
#define S15 (((s0 >> 15) & 1) ? -1 : 0)
#define S16 (((s0 >> 16) & 1) ? -1 : 0)
#define S17 (((s0 >> 17) & 1) ? -1 : 0)
#define S18 (((s0 >> 18) & 1) ? -1 : 0)
#define S19 (((s0 >> 19) & 1) ? -1 : 0)
#define S20 (((s0 >> 20) & 1) ? -1 : 0)
#define S21 (((s0 >> 21) & 1) ? -1 : 0)
#define S22 (((s0 >> 22) & 1) ? -1 : 0)
#define S23 (((s0 >> 23) & 1) ? -1 : 0)
#define S24 (((s0 >> 24) & 1) ? -1 : 0)
#define S25 (((s0 >> 25) & 1) ? -1 : 0)
#define S26 (((s0 >> 26) & 1) ? -1 : 0)
#define S27 (((s0 >> 27) & 1) ? -1 : 0)
#define S28 (((s0 >> 28) & 1) ? -1 : 0)
#define S29 (((s0 >> 29) & 1) ? -1 : 0)
#define S30 (((s0 >> 30) & 1) ? -1 : 0)
#define S31 (((s0 >> 31) & 1) ? -1 : 0)
#define S32 (((s1 >> 0) & 1) ? -1 : 0)
#define S33 (((s1 >> 1) & 1) ? -1 : 0)
#define S34 (((s1 >> 2) & 1) ? -1 : 0)
#define S35 (((s1 >> 3) & 1) ? -1 : 0)
#define S36 (((s1 >> 4) & 1) ? -1 : 0)
#define S37 (((s1 >> 5) & 1) ? -1 : 0)
#define S38 (((s1 >> 6) & 1) ? -1 : 0)
#define S39 (((s1 >> 7) & 1) ? -1 : 0)
#define S40 (((s1 >> 8) & 1) ? -1 : 0)
#define S41 (((s1 >> 9) & 1) ? -1 : 0)
#define S42 (((s1 >> 10) & 1) ? -1 : 0)
#define S43 (((s1 >> 11) & 1) ? -1 : 0)
#define S44 (((s1 >> 12) & 1) ? -1 : 0)
#define S45 (((s1 >> 13) & 1) ? -1 : 0)
#define S46 (((s1 >> 14) & 1) ? -1 : 0)
#define S47 (((s1 >> 15) & 1) ? -1 : 0)
#define S48 (((s1 >> 16) & 1) ? -1 : 0)
#define S49 (((s1 >> 17) & 1) ? -1 : 0)
#define S50 (((s1 >> 18) & 1) ? -1 : 0)
#define S51 (((s1 >> 19) & 1) ? -1 : 0)
#define S52 (((s1 >> 20) & 1) ? -1 : 0)
#define S53 (((s1 >> 21) & 1) ? -1 : 0)
#define S54 (((s1 >> 22) & 1) ? -1 : 0)
#define S55 (((s1 >> 23) & 1) ? -1 : 0)
#define S56 (((s1 >> 24) & 1) ? -1 : 0)
#define S57 (((s1 >> 25) & 1) ? -1 : 0)
#define S58 (((s1 >> 26) & 1) ? -1 : 0)
#define S59 (((s1 >> 27) & 1) ? -1 : 0)
#define S60 (((s1 >> 28) & 1) ? -1 : 0)
#define S61 (((s1 >> 29) & 1) ? -1 : 0)
#define S62 (((s1 >> 30) & 1) ? -1 : 0)
#define S63 (((s1 >> 31) & 1) ? -1 : 0)
const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0);
const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0);
const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0);
const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0);
const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0);
const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0);
const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0);
const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0);
const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0);
const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0);
const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0);
const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0);
const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0);
const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0);
const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0);
const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0);
const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0);
const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0);
const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0);
const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0);
const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);
/**
* base
@ -2559,14 +2484,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
* inner loop
*/
#ifdef IS_CUDA
const u32 pc_pos = (blockIdx.y * blockDim.y) + threadIdx.y;
#else
const u32 pc_pos = get_global_id (1);
#endif
const u32 il_pos = pc_pos * 32;
for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32)
{
u32 k00 = K00;
u32 k01 = K01;
u32 k02 = K02;
@ -2596,6 +2515,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
u32 k26 = K26;
u32 k27 = K27;
const u32 pc_pos = il_pos / 32;
k00 |= words_buf_s[pc_pos].b[ 0];
k01 |= words_buf_s[pc_pos].b[ 1];
k02 |= words_buf_s[pc_pos].b[ 2];
@ -2664,7 +2585,7 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D14 ^ S14;
tmpResult |= D15 ^ S15;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D16 ^ S16;
tmpResult |= D17 ^ S17;
@ -2683,7 +2604,7 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D30 ^ S30;
tmpResult |= D31 ^ S31;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D32 ^ S32;
tmpResult |= D33 ^ S33;
@ -2702,7 +2623,7 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D46 ^ S46;
tmpResult |= D47 ^ S47;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
tmpResult |= D48 ^ S48;
tmpResult |= D49 ^ S49;
@ -2721,11 +2642,12 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ())
tmpResult |= D62 ^ S62;
tmpResult |= D63 ^ S63;
if (tmpResult == 0xffffffff) return;
if (tmpResult == 0xffffffff) continue;
const u32 slice = ffz (tmpResult);
#ifdef KERNEL_STATIC
#include COMPARE_S
#endif
}
}

@ -392,40 +392,40 @@ typedef enum opts_type
OPTS_TYPE_PT_GENERATE_LE = (1ULL << 9),
OPTS_TYPE_PT_GENERATE_BE = (1ULL << 10),
OPTS_TYPE_PT_NEVERCRACK = (1ULL << 11), // if we want all possible results
OPTS_TYPE_PT_BITSLICE = (1ULL << 12),
OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 13),
OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 14),
OPTS_TYPE_PT_LM = (1ULL << 15), // special handling: all lower, 7 max, ...
OPTS_TYPE_PT_HEX = (1ULL << 16), // input wordlist (and masks!) are always in hex
OPTS_TYPE_ST_UTF16LE = (1ULL << 17),
OPTS_TYPE_ST_UTF16BE = (1ULL << 18),
OPTS_TYPE_ST_UPPER = (1ULL << 19),
OPTS_TYPE_ST_LOWER = (1ULL << 20),
OPTS_TYPE_ST_ADD01 = (1ULL << 21),
OPTS_TYPE_ST_ADD02 = (1ULL << 22),
OPTS_TYPE_ST_ADD80 = (1ULL << 23),
OPTS_TYPE_ST_ADDBITS14 = (1ULL << 24),
OPTS_TYPE_ST_ADDBITS15 = (1ULL << 25),
OPTS_TYPE_ST_HEX = (1ULL << 26),
OPTS_TYPE_ST_BASE64 = (1ULL << 27),
OPTS_TYPE_ST_HASH_MD5 = (1ULL << 28),
OPTS_TYPE_HASH_COPY = (1ULL << 29),
OPTS_TYPE_HASH_SPLIT = (1ULL << 30),
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 31), // a kernel which is called each time normal _loop kernel finished.
OPTS_TYPE_PT_ALWAYS_ASCII = (1ULL << 12),
OPTS_TYPE_PT_ALWAYS_HEXIFY = (1ULL << 13),
OPTS_TYPE_PT_LM = (1ULL << 14), // special handling: all lower, 7 max, ...
OPTS_TYPE_PT_HEX = (1ULL << 15), // input wordlist (and masks!) are always in hex
OPTS_TYPE_ST_UTF16LE = (1ULL << 16),
OPTS_TYPE_ST_UTF16BE = (1ULL << 17),
OPTS_TYPE_ST_UPPER = (1ULL << 18),
OPTS_TYPE_ST_LOWER = (1ULL << 19),
OPTS_TYPE_ST_ADD01 = (1ULL << 20),
OPTS_TYPE_ST_ADD02 = (1ULL << 21),
OPTS_TYPE_ST_ADD80 = (1ULL << 22),
OPTS_TYPE_ST_ADDBITS14 = (1ULL << 23),
OPTS_TYPE_ST_ADDBITS15 = (1ULL << 24),
OPTS_TYPE_ST_HEX = (1ULL << 25),
OPTS_TYPE_ST_BASE64 = (1ULL << 26),
OPTS_TYPE_ST_HASH_MD5 = (1ULL << 27),
OPTS_TYPE_HASH_COPY = (1ULL << 28),
OPTS_TYPE_HASH_SPLIT = (1ULL << 29),
OPTS_TYPE_LOOP_EXTENDED = (1ULL << 30), // a kernel which is called each time normal _loop kernel finished.
// but unlike a hook kernel this kernel is called for every _loop iteration offset
OPTS_TYPE_HOOK12 = (1ULL << 32),
OPTS_TYPE_HOOK23 = (1ULL << 33),
OPTS_TYPE_INIT2 = (1ULL << 34),
OPTS_TYPE_LOOP2 = (1ULL << 35),
OPTS_TYPE_AUX1 = (1ULL << 36),
OPTS_TYPE_AUX2 = (1ULL << 37),
OPTS_TYPE_AUX3 = (1ULL << 38),
OPTS_TYPE_AUX4 = (1ULL << 39),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 40),
OPTS_TYPE_PREFERED_THREAD = (1ULL << 41), // some algorithms (complicated ones with many branches) benefit from this
OPTS_TYPE_PT_ADD06 = (1ULL << 42),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 43),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 44), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_HOOK12 = (1ULL << 31),
OPTS_TYPE_HOOK23 = (1ULL << 32),
OPTS_TYPE_INIT2 = (1ULL << 33),
OPTS_TYPE_LOOP2 = (1ULL << 34),
OPTS_TYPE_AUX1 = (1ULL << 35),
OPTS_TYPE_AUX2 = (1ULL << 36),
OPTS_TYPE_AUX3 = (1ULL << 37),
OPTS_TYPE_AUX4 = (1ULL << 38),
OPTS_TYPE_BINARY_HASHFILE = (1ULL << 39),
OPTS_TYPE_PREFERED_THREAD = (1ULL << 40), // some algorithms (complicated ones with many branches) benefit from this
OPTS_TYPE_PT_ADD06 = (1ULL << 41),
OPTS_TYPE_KEYBOARD_MAPPING = (1ULL << 42),
OPTS_TYPE_DEEP_COMP_KERNEL = (1ULL << 43), // if we have to iterate through each hash inside the comp kernel, for example if each hash has to be decrypted separately
OPTS_TYPE_TM_KERNEL = (1ULL << 44),
OPTS_TYPE_SUGGEST_KG = (1ULL << 45), // suggest keep guessing for modules the user maybe wants to use --keep-guessing
OPTS_TYPE_COPY_TMPS = (1ULL << 46), // if we want to use data from tmps buffer (for example get the PMK in WPA)
OPTS_TYPE_POTFILE_NOPASS = (1ULL << 47), // sometimes the password should not be printed to potfile

@ -2868,7 +2868,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
}
else
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
const u32 size_tm = device_param->size_tm;
@ -3430,7 +3430,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
{
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
const status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
const user_options_t *user_options = hashcat_ctx->user_options;
u64 kernel_threads = 0;
u64 dynamic_shared_mem = 0;
@ -3544,16 +3543,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
num_elements = CEILDIV (num_elements, kernel_threads);
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 32, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
}
else
{
if (kern_run == KERN_RUN_1)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
@ -3581,7 +3570,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
}
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
@ -3651,15 +3639,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
cl_event opencl_event;
if ((hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) && (user_options->attack_mode == ATTACK_MODE_BF))
{
const size_t global_work_size[3] = { num_elements, 32, 1 };
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
}
else
{
if (kern_run == KERN_RUN_1)
{
if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT)
@ -3688,7 +3667,6 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
const size_t local_work_size[3] = { kernel_threads, 1, 1 };
if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
}
if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
@ -8839,7 +8817,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
@ -9092,12 +9070,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
device_param->kernel_preferred_wgs_multiple_mp_r = device_param->cuda_warp_size;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1;
//CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1;
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
if (hc_cuModuleGetFunction (hashcat_ctx, &device_param->cuda_function_mp, device_param->cuda_module_mp, "C_markov") == -1) return -1;
@ -9422,7 +9403,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
{
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type);
@ -9670,12 +9651,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_mp_r, &device_param->kernel_preferred_wgs_multiple_mp_r) == -1) return -1;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (user_options->attack_mode == ATTACK_MODE_BF)
{
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]) == -1) return -1;
if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]) == -1) return -1;
}
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_mp, "C_markov", &device_param->opencl_kernel_mp) == -1) return -1;

@ -22,7 +22,7 @@ static const char *HASH_NAME = "descrypt, DES (Unix), Traditional DES";
static const u64 KERN_TYPE = 1500;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_BITSLICE;
| OPTS_TYPE_TM_KERNEL;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = NULL; // the self-test can't work because the salt is not part of the code at compile-time
static const char *ST_HASH = "8133vc.5rieNk";
@ -73,6 +73,13 @@ int module_build_plain_postprocess (MAYBE_UNUSED const hashconfig_t *hashconfig,
return src_len;
}
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 64; // performance only optimization
return kernel_threads_max;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
u32 kernel_loops_max = KERNEL_LOOPS_MAX;
@ -301,7 +308,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;

@ -23,7 +23,7 @@ static const u64 KERN_TYPE = 3000;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_UPPER
| OPTS_TYPE_PT_BITSLICE
| OPTS_TYPE_TM_KERNEL
| OPTS_TYPE_PT_ALWAYS_ASCII
| OPTS_TYPE_PT_LM
| OPTS_TYPE_HASH_SPLIT;

@ -22,7 +22,7 @@ static const char *HASH_NAME = "DES (PT = $salt, key = $pass)";
static const u64 KERN_TYPE = 14000;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_PT_BITSLICE
| OPTS_TYPE_TM_KERNEL
| OPTS_TYPE_ST_HEX;
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
static const char *ST_PASS = "hashcat1";

@ -201,7 +201,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
device_param->kernel_params_buf32[30] = 1;
if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL)
{
pw_t pw;

Loading…
Cancel
Save